8a7efd95bd712099784d2d5d9c4e5d05dc0fa4b4
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize,
833                                          root->fs_info->sectorsize));
834         }
835 }
836
837 static void print_ref_error(int errors)
838 {
839         if (errors & REF_ERR_NO_DIR_ITEM)
840                 fprintf(stderr, ", no dir item");
841         if (errors & REF_ERR_NO_DIR_INDEX)
842                 fprintf(stderr, ", no dir index");
843         if (errors & REF_ERR_NO_INODE_REF)
844                 fprintf(stderr, ", no inode ref");
845         if (errors & REF_ERR_DUP_DIR_ITEM)
846                 fprintf(stderr, ", dup dir item");
847         if (errors & REF_ERR_DUP_DIR_INDEX)
848                 fprintf(stderr, ", dup dir index");
849         if (errors & REF_ERR_DUP_INODE_REF)
850                 fprintf(stderr, ", dup inode ref");
851         if (errors & REF_ERR_INDEX_UNMATCH)
852                 fprintf(stderr, ", index mismatch");
853         if (errors & REF_ERR_FILETYPE_UNMATCH)
854                 fprintf(stderr, ", filetype mismatch");
855         if (errors & REF_ERR_NAME_TOO_LONG)
856                 fprintf(stderr, ", name too long");
857         if (errors & REF_ERR_NO_ROOT_REF)
858                 fprintf(stderr, ", no root ref");
859         if (errors & REF_ERR_NO_ROOT_BACKREF)
860                 fprintf(stderr, ", no root backref");
861         if (errors & REF_ERR_DUP_ROOT_REF)
862                 fprintf(stderr, ", dup root ref");
863         if (errors & REF_ERR_DUP_ROOT_BACKREF)
864                 fprintf(stderr, ", dup root backref");
865         fprintf(stderr, "\n");
866 }
867
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869                                           u64 ino, int mod)
870 {
871         struct ptr_node *node;
872         struct cache_extent *cache;
873         struct inode_record *rec = NULL;
874         int ret;
875
876         cache = lookup_cache_extent(inode_cache, ino, 1);
877         if (cache) {
878                 node = container_of(cache, struct ptr_node, cache);
879                 rec = node->data;
880                 if (mod && rec->refs > 1) {
881                         node->data = clone_inode_rec(rec);
882                         if (IS_ERR(node->data))
883                                 return node->data;
884                         rec->refs--;
885                         rec = node->data;
886                 }
887         } else if (mod) {
888                 rec = calloc(1, sizeof(*rec));
889                 if (!rec)
890                         return ERR_PTR(-ENOMEM);
891                 rec->ino = ino;
892                 rec->extent_start = (u64)-1;
893                 rec->refs = 1;
894                 INIT_LIST_HEAD(&rec->backrefs);
895                 INIT_LIST_HEAD(&rec->orphan_extents);
896                 rec->holes = RB_ROOT;
897
898                 node = malloc(sizeof(*node));
899                 if (!node) {
900                         free(rec);
901                         return ERR_PTR(-ENOMEM);
902                 }
903                 node->cache.start = ino;
904                 node->cache.size = 1;
905                 node->data = rec;
906
907                 if (ino == BTRFS_FREE_INO_OBJECTID)
908                         rec->found_link = 1;
909
910                 ret = insert_cache_extent(inode_cache, &node->cache);
911                 if (ret)
912                         return ERR_PTR(-EEXIST);
913         }
914         return rec;
915 }
916
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 {
919         struct orphan_data_extent *orphan;
920
921         while (!list_empty(orphan_extents)) {
922                 orphan = list_entry(orphan_extents->next,
923                                     struct orphan_data_extent, list);
924                 list_del(&orphan->list);
925                 free(orphan);
926         }
927 }
928
929 static void free_inode_rec(struct inode_record *rec)
930 {
931         struct inode_backref *backref;
932
933         if (--rec->refs > 0)
934                 return;
935
936         while (!list_empty(&rec->backrefs)) {
937                 backref = to_inode_backref(rec->backrefs.next);
938                 list_del(&backref->list);
939                 free(backref);
940         }
941         free_orphan_data_extents(&rec->orphan_extents);
942         free_file_extent_holes(&rec->holes);
943         free(rec);
944 }
945
946 static int can_free_inode_rec(struct inode_record *rec)
947 {
948         if (!rec->errors && rec->checked && rec->found_inode_item &&
949             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950                 return 1;
951         return 0;
952 }
953
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955                                  struct inode_record *rec)
956 {
957         struct cache_extent *cache;
958         struct inode_backref *tmp, *backref;
959         struct ptr_node *node;
960         u8 filetype;
961
962         if (!rec->found_inode_item)
963                 return;
964
965         filetype = imode_to_type(rec->imode);
966         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967                 if (backref->found_dir_item && backref->found_dir_index) {
968                         if (backref->filetype != filetype)
969                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970                         if (!backref->errors && backref->found_inode_ref &&
971                             rec->nlink == rec->found_link) {
972                                 list_del(&backref->list);
973                                 free(backref);
974                         }
975                 }
976         }
977
978         if (!rec->checked || rec->merging)
979                 return;
980
981         if (S_ISDIR(rec->imode)) {
982                 if (rec->found_size != rec->isize)
983                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984                 if (rec->found_file_extent)
985                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
986         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987                 if (rec->found_dir_item)
988                         rec->errors |= I_ERR_ODD_DIR_ITEM;
989                 if (rec->found_size != rec->nbytes)
990                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991                 if (rec->nlink > 0 && !no_holes &&
992                     (rec->extent_end < rec->isize ||
993                      first_extent_gap(&rec->holes) < rec->isize))
994                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995         }
996
997         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998                 if (rec->found_csum_item && rec->nodatasum)
999                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000                 if (rec->some_csum_missing && !rec->nodatasum)
1001                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002         }
1003
1004         BUG_ON(rec->refs != 1);
1005         if (can_free_inode_rec(rec)) {
1006                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007                 node = container_of(cache, struct ptr_node, cache);
1008                 BUG_ON(node->data != rec);
1009                 remove_cache_extent(inode_cache, &node->cache);
1010                 free(node);
1011                 free_inode_rec(rec);
1012         }
1013 }
1014
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 {
1017         struct btrfs_path path;
1018         struct btrfs_key key;
1019         int ret;
1020
1021         key.objectid = BTRFS_ORPHAN_OBJECTID;
1022         key.type = BTRFS_ORPHAN_ITEM_KEY;
1023         key.offset = ino;
1024
1025         btrfs_init_path(&path);
1026         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027         btrfs_release_path(&path);
1028         if (ret > 0)
1029                 ret = -ENOENT;
1030         return ret;
1031 }
1032
1033 static int process_inode_item(struct extent_buffer *eb,
1034                               int slot, struct btrfs_key *key,
1035                               struct shared_node *active_node)
1036 {
1037         struct inode_record *rec;
1038         struct btrfs_inode_item *item;
1039
1040         rec = active_node->current;
1041         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042         if (rec->found_inode_item) {
1043                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044                 return 1;
1045         }
1046         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047         rec->nlink = btrfs_inode_nlink(eb, item);
1048         rec->isize = btrfs_inode_size(eb, item);
1049         rec->nbytes = btrfs_inode_nbytes(eb, item);
1050         rec->imode = btrfs_inode_mode(eb, item);
1051         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052                 rec->nodatasum = 1;
1053         rec->found_inode_item = 1;
1054         if (rec->nlink == 0)
1055                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056         maybe_free_inode_rec(&active_node->inode_cache, rec);
1057         return 0;
1058 }
1059
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061                                                 const char *name,
1062                                                 int namelen, u64 dir)
1063 {
1064         struct inode_backref *backref;
1065
1066         list_for_each_entry(backref, &rec->backrefs, list) {
1067                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068                         break;
1069                 if (backref->dir != dir || backref->namelen != namelen)
1070                         continue;
1071                 if (memcmp(name, backref->name, namelen))
1072                         continue;
1073                 return backref;
1074         }
1075
1076         backref = malloc(sizeof(*backref) + namelen + 1);
1077         if (!backref)
1078                 return NULL;
1079         memset(backref, 0, sizeof(*backref));
1080         backref->dir = dir;
1081         backref->namelen = namelen;
1082         memcpy(backref->name, name, namelen);
1083         backref->name[namelen] = '\0';
1084         list_add_tail(&backref->list, &rec->backrefs);
1085         return backref;
1086 }
1087
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089                              u64 ino, u64 dir, u64 index,
1090                              const char *name, int namelen,
1091                              u8 filetype, u8 itemtype, int errors)
1092 {
1093         struct inode_record *rec;
1094         struct inode_backref *backref;
1095
1096         rec = get_inode_rec(inode_cache, ino, 1);
1097         BUG_ON(IS_ERR(rec));
1098         backref = get_inode_backref(rec, name, namelen, dir);
1099         BUG_ON(!backref);
1100         if (errors)
1101                 backref->errors |= errors;
1102         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103                 if (backref->found_dir_index)
1104                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105                 if (backref->found_inode_ref && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 if (backref->found_dir_item && backref->filetype != filetype)
1108                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109
1110                 backref->index = index;
1111                 backref->filetype = filetype;
1112                 backref->found_dir_index = 1;
1113         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114                 rec->found_link++;
1115                 if (backref->found_dir_item)
1116                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117                 if (backref->found_dir_index && backref->filetype != filetype)
1118                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119
1120                 backref->filetype = filetype;
1121                 backref->found_dir_item = 1;
1122         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124                 if (backref->found_inode_ref)
1125                         backref->errors |= REF_ERR_DUP_INODE_REF;
1126                 if (backref->found_dir_index && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 else
1129                         backref->index = index;
1130
1131                 backref->ref_type = itemtype;
1132                 backref->found_inode_ref = 1;
1133         } else {
1134                 BUG_ON(1);
1135         }
1136
1137         maybe_free_inode_rec(inode_cache, rec);
1138         return 0;
1139 }
1140
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142                             struct cache_tree *dst_cache)
1143 {
1144         struct inode_backref *backref;
1145         u32 dir_count = 0;
1146         int ret = 0;
1147
1148         dst->merging = 1;
1149         list_for_each_entry(backref, &src->backrefs, list) {
1150                 if (backref->found_dir_index) {
1151                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1152                                         backref->index, backref->name,
1153                                         backref->namelen, backref->filetype,
1154                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1155                 }
1156                 if (backref->found_dir_item) {
1157                         dir_count++;
1158                         add_inode_backref(dst_cache, dst->ino,
1159                                         backref->dir, 0, backref->name,
1160                                         backref->namelen, backref->filetype,
1161                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1162                 }
1163                 if (backref->found_inode_ref) {
1164                         add_inode_backref(dst_cache, dst->ino,
1165                                         backref->dir, backref->index,
1166                                         backref->name, backref->namelen, 0,
1167                                         backref->ref_type, backref->errors);
1168                 }
1169         }
1170
1171         if (src->found_dir_item)
1172                 dst->found_dir_item = 1;
1173         if (src->found_file_extent)
1174                 dst->found_file_extent = 1;
1175         if (src->found_csum_item)
1176                 dst->found_csum_item = 1;
1177         if (src->some_csum_missing)
1178                 dst->some_csum_missing = 1;
1179         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181                 if (ret < 0)
1182                         return ret;
1183         }
1184
1185         BUG_ON(src->found_link < dir_count);
1186         dst->found_link += src->found_link - dir_count;
1187         dst->found_size += src->found_size;
1188         if (src->extent_start != (u64)-1) {
1189                 if (dst->extent_start == (u64)-1) {
1190                         dst->extent_start = src->extent_start;
1191                         dst->extent_end = src->extent_end;
1192                 } else {
1193                         if (dst->extent_end > src->extent_start)
1194                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195                         else if (dst->extent_end < src->extent_start) {
1196                                 ret = add_file_extent_hole(&dst->holes,
1197                                         dst->extent_end,
1198                                         src->extent_start - dst->extent_end);
1199                         }
1200                         if (dst->extent_end < src->extent_end)
1201                                 dst->extent_end = src->extent_end;
1202                 }
1203         }
1204
1205         dst->errors |= src->errors;
1206         if (src->found_inode_item) {
1207                 if (!dst->found_inode_item) {
1208                         dst->nlink = src->nlink;
1209                         dst->isize = src->isize;
1210                         dst->nbytes = src->nbytes;
1211                         dst->imode = src->imode;
1212                         dst->nodatasum = src->nodatasum;
1213                         dst->found_inode_item = 1;
1214                 } else {
1215                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1216                 }
1217         }
1218         dst->merging = 0;
1219
1220         return 0;
1221 }
1222
1223 static int splice_shared_node(struct shared_node *src_node,
1224                               struct shared_node *dst_node)
1225 {
1226         struct cache_extent *cache;
1227         struct ptr_node *node, *ins;
1228         struct cache_tree *src, *dst;
1229         struct inode_record *rec, *conflict;
1230         u64 current_ino = 0;
1231         int splice = 0;
1232         int ret;
1233
1234         if (--src_node->refs == 0)
1235                 splice = 1;
1236         if (src_node->current)
1237                 current_ino = src_node->current->ino;
1238
1239         src = &src_node->root_cache;
1240         dst = &dst_node->root_cache;
1241 again:
1242         cache = search_cache_extent(src, 0);
1243         while (cache) {
1244                 node = container_of(cache, struct ptr_node, cache);
1245                 rec = node->data;
1246                 cache = next_cache_extent(cache);
1247
1248                 if (splice) {
1249                         remove_cache_extent(src, &node->cache);
1250                         ins = node;
1251                 } else {
1252                         ins = malloc(sizeof(*ins));
1253                         BUG_ON(!ins);
1254                         ins->cache.start = node->cache.start;
1255                         ins->cache.size = node->cache.size;
1256                         ins->data = rec;
1257                         rec->refs++;
1258                 }
1259                 ret = insert_cache_extent(dst, &ins->cache);
1260                 if (ret == -EEXIST) {
1261                         conflict = get_inode_rec(dst, rec->ino, 1);
1262                         BUG_ON(IS_ERR(conflict));
1263                         merge_inode_recs(rec, conflict, dst);
1264                         if (rec->checked) {
1265                                 conflict->checked = 1;
1266                                 if (dst_node->current == conflict)
1267                                         dst_node->current = NULL;
1268                         }
1269                         maybe_free_inode_rec(dst, conflict);
1270                         free_inode_rec(rec);
1271                         free(ins);
1272                 } else {
1273                         BUG_ON(ret);
1274                 }
1275         }
1276
1277         if (src == &src_node->root_cache) {
1278                 src = &src_node->inode_cache;
1279                 dst = &dst_node->inode_cache;
1280                 goto again;
1281         }
1282
1283         if (current_ino > 0 && (!dst_node->current ||
1284             current_ino > dst_node->current->ino)) {
1285                 if (dst_node->current) {
1286                         dst_node->current->checked = 1;
1287                         maybe_free_inode_rec(dst, dst_node->current);
1288                 }
1289                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290                 BUG_ON(IS_ERR(dst_node->current));
1291         }
1292         return 0;
1293 }
1294
1295 static void free_inode_ptr(struct cache_extent *cache)
1296 {
1297         struct ptr_node *node;
1298         struct inode_record *rec;
1299
1300         node = container_of(cache, struct ptr_node, cache);
1301         rec = node->data;
1302         free_inode_rec(rec);
1303         free(node);
1304 }
1305
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309                                             u64 bytenr)
1310 {
1311         struct cache_extent *cache;
1312         struct shared_node *node;
1313
1314         cache = lookup_cache_extent(shared, bytenr, 1);
1315         if (cache) {
1316                 node = container_of(cache, struct shared_node, cache);
1317                 return node;
1318         }
1319         return NULL;
1320 }
1321
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 {
1324         int ret;
1325         struct shared_node *node;
1326
1327         node = calloc(1, sizeof(*node));
1328         if (!node)
1329                 return -ENOMEM;
1330         node->cache.start = bytenr;
1331         node->cache.size = 1;
1332         cache_tree_init(&node->root_cache);
1333         cache_tree_init(&node->inode_cache);
1334         node->refs = refs;
1335
1336         ret = insert_cache_extent(shared, &node->cache);
1337
1338         return ret;
1339 }
1340
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342                              struct walk_control *wc, int level)
1343 {
1344         struct shared_node *node;
1345         struct shared_node *dest;
1346         int ret;
1347
1348         if (level == wc->active_node)
1349                 return 0;
1350
1351         BUG_ON(wc->active_node <= level);
1352         node = find_shared_node(&wc->shared, bytenr);
1353         if (!node) {
1354                 ret = add_shared_node(&wc->shared, bytenr, refs);
1355                 BUG_ON(ret);
1356                 node = find_shared_node(&wc->shared, bytenr);
1357                 wc->nodes[level] = node;
1358                 wc->active_node = level;
1359                 return 0;
1360         }
1361
1362         if (wc->root_level == wc->active_node &&
1363             btrfs_root_refs(&root->root_item) == 0) {
1364                 if (--node->refs == 0) {
1365                         free_inode_recs_tree(&node->root_cache);
1366                         free_inode_recs_tree(&node->inode_cache);
1367                         remove_cache_extent(&wc->shared, &node->cache);
1368                         free(node);
1369                 }
1370                 return 1;
1371         }
1372
1373         dest = wc->nodes[wc->active_node];
1374         splice_shared_node(node, dest);
1375         if (node->refs == 0) {
1376                 remove_cache_extent(&wc->shared, &node->cache);
1377                 free(node);
1378         }
1379         return 1;
1380 }
1381
1382 static int leave_shared_node(struct btrfs_root *root,
1383                              struct walk_control *wc, int level)
1384 {
1385         struct shared_node *node;
1386         struct shared_node *dest;
1387         int i;
1388
1389         if (level == wc->root_level)
1390                 return 0;
1391
1392         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393                 if (wc->nodes[i])
1394                         break;
1395         }
1396         BUG_ON(i >= BTRFS_MAX_LEVEL);
1397
1398         node = wc->nodes[wc->active_node];
1399         wc->nodes[wc->active_node] = NULL;
1400         wc->active_node = i;
1401
1402         dest = wc->nodes[wc->active_node];
1403         if (wc->active_node < wc->root_level ||
1404             btrfs_root_refs(&root->root_item) > 0) {
1405                 BUG_ON(node->refs <= 1);
1406                 splice_shared_node(node, dest);
1407         } else {
1408                 BUG_ON(node->refs < 2);
1409                 node->refs--;
1410         }
1411         return 0;
1412 }
1413
1414 /*
1415  * Returns:
1416  * < 0 - on error
1417  * 1   - if the root with id child_root_id is a child of root parent_root_id
1418  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1419  *       has other root(s) as parent(s)
1420  * 2   - if the root child_root_id doesn't have any parent roots
1421  */
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423                          u64 child_root_id)
1424 {
1425         struct btrfs_path path;
1426         struct btrfs_key key;
1427         struct extent_buffer *leaf;
1428         int has_parent = 0;
1429         int ret;
1430
1431         btrfs_init_path(&path);
1432
1433         key.objectid = parent_root_id;
1434         key.type = BTRFS_ROOT_REF_KEY;
1435         key.offset = child_root_id;
1436         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437                                 0, 0);
1438         if (ret < 0)
1439                 return ret;
1440         btrfs_release_path(&path);
1441         if (!ret)
1442                 return 1;
1443
1444         key.objectid = child_root_id;
1445         key.type = BTRFS_ROOT_BACKREF_KEY;
1446         key.offset = 0;
1447         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448                                 0, 0);
1449         if (ret < 0)
1450                 goto out;
1451
1452         while (1) {
1453                 leaf = path.nodes[0];
1454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456                         if (ret)
1457                                 break;
1458                         leaf = path.nodes[0];
1459                 }
1460
1461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462                 if (key.objectid != child_root_id ||
1463                     key.type != BTRFS_ROOT_BACKREF_KEY)
1464                         break;
1465
1466                 has_parent = 1;
1467
1468                 if (key.offset == parent_root_id) {
1469                         btrfs_release_path(&path);
1470                         return 1;
1471                 }
1472
1473                 path.slots[0]++;
1474         }
1475 out:
1476         btrfs_release_path(&path);
1477         if (ret < 0)
1478                 return ret;
1479         return has_parent ? 0 : 2;
1480 }
1481
1482 static int process_dir_item(struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (cur + sizeof(*di) + name_len > total ||
1517                     name_len > BTRFS_NAME_LEN) {
1518                         error = REF_ERR_NAME_TOO_LONG;
1519
1520                         if (cur + sizeof(*di) > total)
1521                                 break;
1522                         len = min_t(u32, total - cur - sizeof(*di),
1523                                     BTRFS_NAME_LEN);
1524                 } else {
1525                         len = name_len;
1526                         error = 0;
1527                 }
1528
1529                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530
1531                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1532                         add_inode_backref(inode_cache, location.objectid,
1533                                           key->objectid, key->offset, namebuf,
1534                                           len, filetype, key->type, error);
1535                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1536                         add_inode_backref(root_cache, location.objectid,
1537                                           key->objectid, key->offset,
1538                                           namebuf, len, filetype,
1539                                           key->type, error);
1540                 } else {
1541                         fprintf(stderr, "invalid location in dir item %u\n",
1542                                 location.type);
1543                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1544                                           key->objectid, key->offset, namebuf,
1545                                           len, filetype, key->type, error);
1546                 }
1547
1548                 len = sizeof(*di) + name_len + data_len;
1549                 di = (struct btrfs_dir_item *)((char *)di + len);
1550                 cur += len;
1551         }
1552         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1553                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1554
1555         return 0;
1556 }
1557
1558 static int process_inode_ref(struct extent_buffer *eb,
1559                              int slot, struct btrfs_key *key,
1560                              struct shared_node *active_node)
1561 {
1562         u32 total;
1563         u32 cur = 0;
1564         u32 len;
1565         u32 name_len;
1566         u64 index;
1567         int error;
1568         struct cache_tree *inode_cache;
1569         struct btrfs_inode_ref *ref;
1570         char namebuf[BTRFS_NAME_LEN];
1571
1572         inode_cache = &active_node->inode_cache;
1573
1574         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1575         total = btrfs_item_size_nr(eb, slot);
1576         while (cur < total) {
1577                 name_len = btrfs_inode_ref_name_len(eb, ref);
1578                 index = btrfs_inode_ref_index(eb, ref);
1579
1580                 /* inode_ref + namelen should not cross item boundary */
1581                 if (cur + sizeof(*ref) + name_len > total ||
1582                     name_len > BTRFS_NAME_LEN) {
1583                         if (total < cur + sizeof(*ref))
1584                                 break;
1585
1586                         /* Still try to read out the remaining part */
1587                         len = min_t(u32, total - cur - sizeof(*ref),
1588                                     BTRFS_NAME_LEN);
1589                         error = REF_ERR_NAME_TOO_LONG;
1590                 } else {
1591                         len = name_len;
1592                         error = 0;
1593                 }
1594
1595                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1596                 add_inode_backref(inode_cache, key->objectid, key->offset,
1597                                   index, namebuf, len, 0, key->type, error);
1598
1599                 len = sizeof(*ref) + name_len;
1600                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1601                 cur += len;
1602         }
1603         return 0;
1604 }
1605
1606 static int process_inode_extref(struct extent_buffer *eb,
1607                                 int slot, struct btrfs_key *key,
1608                                 struct shared_node *active_node)
1609 {
1610         u32 total;
1611         u32 cur = 0;
1612         u32 len;
1613         u32 name_len;
1614         u64 index;
1615         u64 parent;
1616         int error;
1617         struct cache_tree *inode_cache;
1618         struct btrfs_inode_extref *extref;
1619         char namebuf[BTRFS_NAME_LEN];
1620
1621         inode_cache = &active_node->inode_cache;
1622
1623         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1624         total = btrfs_item_size_nr(eb, slot);
1625         while (cur < total) {
1626                 name_len = btrfs_inode_extref_name_len(eb, extref);
1627                 index = btrfs_inode_extref_index(eb, extref);
1628                 parent = btrfs_inode_extref_parent(eb, extref);
1629                 if (name_len <= BTRFS_NAME_LEN) {
1630                         len = name_len;
1631                         error = 0;
1632                 } else {
1633                         len = BTRFS_NAME_LEN;
1634                         error = REF_ERR_NAME_TOO_LONG;
1635                 }
1636                 read_extent_buffer(eb, namebuf,
1637                                    (unsigned long)(extref + 1), len);
1638                 add_inode_backref(inode_cache, key->objectid, parent,
1639                                   index, namebuf, len, 0, key->type, error);
1640
1641                 len = sizeof(*extref) + name_len;
1642                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1643                 cur += len;
1644         }
1645         return 0;
1646
1647 }
1648
1649 static int count_csum_range(struct btrfs_root *root, u64 start,
1650                             u64 len, u64 *found)
1651 {
1652         struct btrfs_key key;
1653         struct btrfs_path path;
1654         struct extent_buffer *leaf;
1655         int ret;
1656         size_t size;
1657         *found = 0;
1658         u64 csum_end;
1659         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1660
1661         btrfs_init_path(&path);
1662
1663         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1664         key.offset = start;
1665         key.type = BTRFS_EXTENT_CSUM_KEY;
1666
1667         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1668                                 &key, &path, 0, 0);
1669         if (ret < 0)
1670                 goto out;
1671         if (ret > 0 && path.slots[0] > 0) {
1672                 leaf = path.nodes[0];
1673                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1674                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1675                     key.type == BTRFS_EXTENT_CSUM_KEY)
1676                         path.slots[0]--;
1677         }
1678
1679         while (len > 0) {
1680                 leaf = path.nodes[0];
1681                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1682                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1683                         if (ret > 0)
1684                                 break;
1685                         else if (ret < 0)
1686                                 goto out;
1687                         leaf = path.nodes[0];
1688                 }
1689
1690                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1691                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1692                     key.type != BTRFS_EXTENT_CSUM_KEY)
1693                         break;
1694
1695                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1696                 if (key.offset >= start + len)
1697                         break;
1698
1699                 if (key.offset > start)
1700                         start = key.offset;
1701
1702                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1703                 csum_end = key.offset + (size / csum_size) *
1704                            root->fs_info->sectorsize;
1705                 if (csum_end > start) {
1706                         size = min(csum_end - start, len);
1707                         len -= size;
1708                         start += size;
1709                         *found += size;
1710                 }
1711
1712                 path.slots[0]++;
1713         }
1714 out:
1715         btrfs_release_path(&path);
1716         if (ret < 0)
1717                 return ret;
1718         return 0;
1719 }
1720
1721 static int process_file_extent(struct btrfs_root *root,
1722                                 struct extent_buffer *eb,
1723                                 int slot, struct btrfs_key *key,
1724                                 struct shared_node *active_node)
1725 {
1726         struct inode_record *rec;
1727         struct btrfs_file_extent_item *fi;
1728         u64 num_bytes = 0;
1729         u64 disk_bytenr = 0;
1730         u64 extent_offset = 0;
1731         u64 mask = root->fs_info->sectorsize - 1;
1732         int extent_type;
1733         int ret;
1734
1735         rec = active_node->current;
1736         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1737         rec->found_file_extent = 1;
1738
1739         if (rec->extent_start == (u64)-1) {
1740                 rec->extent_start = key->offset;
1741                 rec->extent_end = key->offset;
1742         }
1743
1744         if (rec->extent_end > key->offset)
1745                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1746         else if (rec->extent_end < key->offset) {
1747                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1748                                            key->offset - rec->extent_end);
1749                 if (ret < 0)
1750                         return ret;
1751         }
1752
1753         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1754         extent_type = btrfs_file_extent_type(eb, fi);
1755
1756         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1757                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1758                 if (num_bytes == 0)
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 rec->found_size += num_bytes;
1761                 num_bytes = (num_bytes + mask) & ~mask;
1762         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1763                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1764                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1765                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1766                 extent_offset = btrfs_file_extent_offset(eb, fi);
1767                 if (num_bytes == 0 || (num_bytes & mask))
1768                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769                 if (num_bytes + extent_offset >
1770                     btrfs_file_extent_ram_bytes(eb, fi))
1771                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1772                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1773                     (btrfs_file_extent_compression(eb, fi) ||
1774                      btrfs_file_extent_encryption(eb, fi) ||
1775                      btrfs_file_extent_other_encoding(eb, fi)))
1776                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777                 if (disk_bytenr > 0)
1778                         rec->found_size += num_bytes;
1779         } else {
1780                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1781         }
1782         rec->extent_end = key->offset + num_bytes;
1783
1784         /*
1785          * The data reloc tree will copy full extents into its inode and then
1786          * copy the corresponding csums.  Because the extent it copied could be
1787          * a preallocated extent that hasn't been written to yet there may be no
1788          * csums to copy, ergo we won't have csums for our file extent.  This is
1789          * ok so just don't bother checking csums if the inode belongs to the
1790          * data reloc tree.
1791          */
1792         if (disk_bytenr > 0 &&
1793             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1794                 u64 found;
1795                 if (btrfs_file_extent_compression(eb, fi))
1796                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1797                 else
1798                         disk_bytenr += extent_offset;
1799
1800                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1801                 if (ret < 0)
1802                         return ret;
1803                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1804                         if (found > 0)
1805                                 rec->found_csum_item = 1;
1806                         if (found < num_bytes)
1807                                 rec->some_csum_missing = 1;
1808                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1809                         if (found > 0)
1810                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1811                 }
1812         }
1813         return 0;
1814 }
1815
1816 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1817                             struct walk_control *wc)
1818 {
1819         struct btrfs_key key;
1820         u32 nritems;
1821         int i;
1822         int ret = 0;
1823         struct cache_tree *inode_cache;
1824         struct shared_node *active_node;
1825
1826         if (wc->root_level == wc->active_node &&
1827             btrfs_root_refs(&root->root_item) == 0)
1828                 return 0;
1829
1830         active_node = wc->nodes[wc->active_node];
1831         inode_cache = &active_node->inode_cache;
1832         nritems = btrfs_header_nritems(eb);
1833         for (i = 0; i < nritems; i++) {
1834                 btrfs_item_key_to_cpu(eb, &key, i);
1835
1836                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1837                         continue;
1838                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1839                         continue;
1840
1841                 if (active_node->current == NULL ||
1842                     active_node->current->ino < key.objectid) {
1843                         if (active_node->current) {
1844                                 active_node->current->checked = 1;
1845                                 maybe_free_inode_rec(inode_cache,
1846                                                      active_node->current);
1847                         }
1848                         active_node->current = get_inode_rec(inode_cache,
1849                                                              key.objectid, 1);
1850                         BUG_ON(IS_ERR(active_node->current));
1851                 }
1852                 switch (key.type) {
1853                 case BTRFS_DIR_ITEM_KEY:
1854                 case BTRFS_DIR_INDEX_KEY:
1855                         ret = process_dir_item(eb, i, &key, active_node);
1856                         break;
1857                 case BTRFS_INODE_REF_KEY:
1858                         ret = process_inode_ref(eb, i, &key, active_node);
1859                         break;
1860                 case BTRFS_INODE_EXTREF_KEY:
1861                         ret = process_inode_extref(eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_ITEM_KEY:
1864                         ret = process_inode_item(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_EXTENT_DATA_KEY:
1867                         ret = process_file_extent(root, eb, i, &key,
1868                                                   active_node);
1869                         break;
1870                 default:
1871                         break;
1872                 };
1873         }
1874         return ret;
1875 }
1876
1877 struct node_refs {
1878         u64 bytenr[BTRFS_MAX_LEVEL];
1879         u64 refs[BTRFS_MAX_LEVEL];
1880         int need_check[BTRFS_MAX_LEVEL];
1881 };
1882
1883 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1884                              struct node_refs *nrefs, u64 level);
1885 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1886                             unsigned int ext_ref);
1887
1888 /*
1889  * Returns >0  Found error, not fatal, should continue
1890  * Returns <0  Fatal error, must exit the whole check
1891  * Returns 0   No errors found
1892  */
1893 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1894                                struct node_refs *nrefs, int *level, int ext_ref)
1895 {
1896         struct extent_buffer *cur = path->nodes[0];
1897         struct btrfs_key key;
1898         u64 cur_bytenr;
1899         u32 nritems;
1900         u64 first_ino = 0;
1901         int root_level = btrfs_header_level(root->node);
1902         int i;
1903         int ret = 0; /* Final return value */
1904         int err = 0; /* Positive error bitmap */
1905
1906         cur_bytenr = cur->start;
1907
1908         /* skip to first inode item or the first inode number change */
1909         nritems = btrfs_header_nritems(cur);
1910         for (i = 0; i < nritems; i++) {
1911                 btrfs_item_key_to_cpu(cur, &key, i);
1912                 if (i == 0)
1913                         first_ino = key.objectid;
1914                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1915                     (first_ino && first_ino != key.objectid))
1916                         break;
1917         }
1918         if (i == nritems) {
1919                 path->slots[0] = nritems;
1920                 return 0;
1921         }
1922         path->slots[0] = i;
1923
1924 again:
1925         err |= check_inode_item(root, path, ext_ref);
1926
1927         if (err & LAST_ITEM)
1928                 goto out;
1929
1930         /* still have inode items in thie leaf */
1931         if (cur->start == cur_bytenr)
1932                 goto again;
1933
1934         /*
1935          * we have switched to another leaf, above nodes may
1936          * have changed, here walk down the path, if a node
1937          * or leaf is shared, check whether we can skip this
1938          * node or leaf.
1939          */
1940         for (i = root_level; i >= 0; i--) {
1941                 if (path->nodes[i]->start == nrefs->bytenr[i])
1942                         continue;
1943
1944                 ret = update_nodes_refs(root,
1945                                 path->nodes[i]->start,
1946                                 nrefs, i);
1947                 if (ret)
1948                         goto out;
1949
1950                 if (!nrefs->need_check[i]) {
1951                         *level += 1;
1952                         break;
1953                 }
1954         }
1955
1956         for (i = 0; i < *level; i++) {
1957                 free_extent_buffer(path->nodes[i]);
1958                 path->nodes[i] = NULL;
1959         }
1960 out:
1961         err &= ~LAST_ITEM;
1962         if (err && !ret)
1963                 ret = err;
1964         return ret;
1965 }
1966
1967 static void reada_walk_down(struct btrfs_root *root,
1968                             struct extent_buffer *node, int slot)
1969 {
1970         u64 bytenr;
1971         u64 ptr_gen;
1972         u32 nritems;
1973         u32 blocksize;
1974         int i;
1975         int level;
1976
1977         level = btrfs_header_level(node);
1978         if (level != 1)
1979                 return;
1980
1981         nritems = btrfs_header_nritems(node);
1982         blocksize = root->fs_info->nodesize;
1983         for (i = slot; i < nritems; i++) {
1984                 bytenr = btrfs_node_blockptr(node, i);
1985                 ptr_gen = btrfs_node_ptr_generation(node, i);
1986                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1987         }
1988 }
1989
1990 /*
1991  * Check the child node/leaf by the following condition:
1992  * 1. the first item key of the node/leaf should be the same with the one
1993  *    in parent.
1994  * 2. block in parent node should match the child node/leaf.
1995  * 3. generation of parent node and child's header should be consistent.
1996  *
1997  * Or the child node/leaf pointed by the key in parent is not valid.
1998  *
1999  * We hope to check leaf owner too, but since subvol may share leaves,
2000  * which makes leaf owner check not so strong, key check should be
2001  * sufficient enough for that case.
2002  */
2003 static int check_child_node(struct extent_buffer *parent, int slot,
2004                             struct extent_buffer *child)
2005 {
2006         struct btrfs_key parent_key;
2007         struct btrfs_key child_key;
2008         int ret = 0;
2009
2010         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2011         if (btrfs_header_level(child) == 0)
2012                 btrfs_item_key_to_cpu(child, &child_key, 0);
2013         else
2014                 btrfs_node_key_to_cpu(child, &child_key, 0);
2015
2016         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2017                 ret = -EINVAL;
2018                 fprintf(stderr,
2019                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2020                         parent_key.objectid, parent_key.type, parent_key.offset,
2021                         child_key.objectid, child_key.type, child_key.offset);
2022         }
2023         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2024                 ret = -EINVAL;
2025                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2026                         btrfs_node_blockptr(parent, slot),
2027                         btrfs_header_bytenr(child));
2028         }
2029         if (btrfs_node_ptr_generation(parent, slot) !=
2030             btrfs_header_generation(child)) {
2031                 ret = -EINVAL;
2032                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2033                         btrfs_header_generation(child),
2034                         btrfs_node_ptr_generation(parent, slot));
2035         }
2036         return ret;
2037 }
2038
2039 /*
2040  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2041  * in every fs or file tree check. Here we find its all root ids, and only check
2042  * it in the fs or file tree which has the smallest root id.
2043  */
2044 static int need_check(struct btrfs_root *root, struct ulist *roots)
2045 {
2046         struct rb_node *node;
2047         struct ulist_node *u;
2048
2049         if (roots->nnodes == 1)
2050                 return 1;
2051
2052         node = rb_first(&roots->root);
2053         u = rb_entry(node, struct ulist_node, rb_node);
2054         /*
2055          * current root id is not smallest, we skip it and let it be checked
2056          * in the fs or file tree who hash the smallest root id.
2057          */
2058         if (root->objectid != u->val)
2059                 return 0;
2060
2061         return 1;
2062 }
2063
2064 /*
2065  * for a tree node or leaf, we record its reference count, so later if we still
2066  * process this node or leaf, don't need to compute its reference count again.
2067  */
2068 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2069                              struct node_refs *nrefs, u64 level)
2070 {
2071         int check, ret;
2072         u64 refs;
2073         struct ulist *roots;
2074
2075         if (nrefs->bytenr[level] != bytenr) {
2076                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2077                                        level, 1, &refs, NULL);
2078                 if (ret < 0)
2079                         return ret;
2080
2081                 nrefs->bytenr[level] = bytenr;
2082                 nrefs->refs[level] = refs;
2083                 if (refs > 1) {
2084                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2085                                                    0, &roots);
2086                         if (ret)
2087                                 return -EIO;
2088
2089                         check = need_check(root, roots);
2090                         ulist_free(roots);
2091                         nrefs->need_check[level] = check;
2092                 } else {
2093                         nrefs->need_check[level] = 1;
2094                 }
2095         }
2096
2097         return 0;
2098 }
2099
2100 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2101                           struct walk_control *wc, int *level,
2102                           struct node_refs *nrefs)
2103 {
2104         enum btrfs_tree_block_status status;
2105         u64 bytenr;
2106         u64 ptr_gen;
2107         struct extent_buffer *next;
2108         struct extent_buffer *cur;
2109         u32 blocksize;
2110         int ret, err = 0;
2111         u64 refs;
2112
2113         WARN_ON(*level < 0);
2114         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2115
2116         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2117                 refs = nrefs->refs[*level];
2118                 ret = 0;
2119         } else {
2120                 ret = btrfs_lookup_extent_info(NULL, root,
2121                                        path->nodes[*level]->start,
2122                                        *level, 1, &refs, NULL);
2123                 if (ret < 0) {
2124                         err = ret;
2125                         goto out;
2126                 }
2127                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2128                 nrefs->refs[*level] = refs;
2129         }
2130
2131         if (refs > 1) {
2132                 ret = enter_shared_node(root, path->nodes[*level]->start,
2133                                         refs, wc, *level);
2134                 if (ret > 0) {
2135                         err = ret;
2136                         goto out;
2137                 }
2138         }
2139
2140         while (*level >= 0) {
2141                 WARN_ON(*level < 0);
2142                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2143                 cur = path->nodes[*level];
2144
2145                 if (btrfs_header_level(cur) != *level)
2146                         WARN_ON(1);
2147
2148                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2149                         break;
2150                 if (*level == 0) {
2151                         ret = process_one_leaf(root, cur, wc);
2152                         if (ret < 0)
2153                                 err = ret;
2154                         break;
2155                 }
2156                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2157                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2158                 blocksize = root->fs_info->nodesize;
2159
2160                 if (bytenr == nrefs->bytenr[*level - 1]) {
2161                         refs = nrefs->refs[*level - 1];
2162                 } else {
2163                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2164                                         *level - 1, 1, &refs, NULL);
2165                         if (ret < 0) {
2166                                 refs = 0;
2167                         } else {
2168                                 nrefs->bytenr[*level - 1] = bytenr;
2169                                 nrefs->refs[*level - 1] = refs;
2170                         }
2171                 }
2172
2173                 if (refs > 1) {
2174                         ret = enter_shared_node(root, bytenr, refs,
2175                                                 wc, *level - 1);
2176                         if (ret > 0) {
2177                                 path->slots[*level]++;
2178                                 continue;
2179                         }
2180                 }
2181
2182                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2183                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2184                         free_extent_buffer(next);
2185                         reada_walk_down(root, cur, path->slots[*level]);
2186                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2187                                                ptr_gen);
2188                         if (!extent_buffer_uptodate(next)) {
2189                                 struct btrfs_key node_key;
2190
2191                                 btrfs_node_key_to_cpu(path->nodes[*level],
2192                                                       &node_key,
2193                                                       path->slots[*level]);
2194                                 btrfs_add_corrupt_extent_record(root->fs_info,
2195                                                 &node_key,
2196                                                 path->nodes[*level]->start,
2197                                                 root->fs_info->nodesize,
2198                                                 *level);
2199                                 err = -EIO;
2200                                 goto out;
2201                         }
2202                 }
2203
2204                 ret = check_child_node(cur, path->slots[*level], next);
2205                 if (ret) {
2206                         free_extent_buffer(next);
2207                         err = ret;
2208                         goto out;
2209                 }
2210
2211                 if (btrfs_is_leaf(next))
2212                         status = btrfs_check_leaf(root, NULL, next);
2213                 else
2214                         status = btrfs_check_node(root, NULL, next);
2215                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2216                         free_extent_buffer(next);
2217                         err = -EIO;
2218                         goto out;
2219                 }
2220
2221                 *level = *level - 1;
2222                 free_extent_buffer(path->nodes[*level]);
2223                 path->nodes[*level] = next;
2224                 path->slots[*level] = 0;
2225         }
2226 out:
2227         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2228         return err;
2229 }
2230
2231 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2232                             unsigned int ext_ref);
2233
2234 /*
2235  * Returns >0  Found error, should continue
2236  * Returns <0  Fatal error, must exit the whole check
2237  * Returns 0   No errors found
2238  */
2239 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2240                              int *level, struct node_refs *nrefs, int ext_ref)
2241 {
2242         enum btrfs_tree_block_status status;
2243         u64 bytenr;
2244         u64 ptr_gen;
2245         struct extent_buffer *next;
2246         struct extent_buffer *cur;
2247         u32 blocksize;
2248         int ret;
2249
2250         WARN_ON(*level < 0);
2251         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2252
2253         ret = update_nodes_refs(root, path->nodes[*level]->start,
2254                                 nrefs, *level);
2255         if (ret < 0)
2256                 return ret;
2257
2258         while (*level >= 0) {
2259                 WARN_ON(*level < 0);
2260                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2261                 cur = path->nodes[*level];
2262
2263                 if (btrfs_header_level(cur) != *level)
2264                         WARN_ON(1);
2265
2266                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2267                         break;
2268                 /* Don't forgot to check leaf/node validation */
2269                 if (*level == 0) {
2270                         ret = btrfs_check_leaf(root, NULL, cur);
2271                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2272                                 ret = -EIO;
2273                                 break;
2274                         }
2275                         ret = process_one_leaf_v2(root, path, nrefs,
2276                                                   level, ext_ref);
2277                         break;
2278                 } else {
2279                         ret = btrfs_check_node(root, NULL, cur);
2280                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2281                                 ret = -EIO;
2282                                 break;
2283                         }
2284                 }
2285                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2286                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2287                 blocksize = root->fs_info->nodesize;
2288
2289                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2290                 if (ret)
2291                         break;
2292                 if (!nrefs->need_check[*level - 1]) {
2293                         path->slots[*level]++;
2294                         continue;
2295                 }
2296
2297                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2298                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2299                         free_extent_buffer(next);
2300                         reada_walk_down(root, cur, path->slots[*level]);
2301                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2302                                                ptr_gen);
2303                         if (!extent_buffer_uptodate(next)) {
2304                                 struct btrfs_key node_key;
2305
2306                                 btrfs_node_key_to_cpu(path->nodes[*level],
2307                                                       &node_key,
2308                                                       path->slots[*level]);
2309                                 btrfs_add_corrupt_extent_record(root->fs_info,
2310                                                 &node_key,
2311                                                 path->nodes[*level]->start,
2312                                                 root->fs_info->nodesize,
2313                                                 *level);
2314                                 ret = -EIO;
2315                                 break;
2316                         }
2317                 }
2318
2319                 ret = check_child_node(cur, path->slots[*level], next);
2320                 if (ret < 0) 
2321                         break;
2322
2323                 if (btrfs_is_leaf(next))
2324                         status = btrfs_check_leaf(root, NULL, next);
2325                 else
2326                         status = btrfs_check_node(root, NULL, next);
2327                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2328                         free_extent_buffer(next);
2329                         ret = -EIO;
2330                         break;
2331                 }
2332
2333                 *level = *level - 1;
2334                 free_extent_buffer(path->nodes[*level]);
2335                 path->nodes[*level] = next;
2336                 path->slots[*level] = 0;
2337         }
2338         return ret;
2339 }
2340
2341 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2342                         struct walk_control *wc, int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         BUG_ON(*level > wc->active_node);
2357                         if (*level == wc->active_node)
2358                                 leave_shared_node(root, wc, *level);
2359                         *level = i + 1;
2360                 }
2361         }
2362         return 1;
2363 }
2364
2365 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2366                            int *level)
2367 {
2368         int i;
2369         struct extent_buffer *leaf;
2370
2371         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2372                 leaf = path->nodes[i];
2373                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2374                         path->slots[i]++;
2375                         *level = i;
2376                         return 0;
2377                 } else {
2378                         free_extent_buffer(path->nodes[*level]);
2379                         path->nodes[*level] = NULL;
2380                         *level = i + 1;
2381                 }
2382         }
2383         return 1;
2384 }
2385
2386 static int check_root_dir(struct inode_record *rec)
2387 {
2388         struct inode_backref *backref;
2389         int ret = -1;
2390
2391         if (!rec->found_inode_item || rec->errors)
2392                 goto out;
2393         if (rec->nlink != 1 || rec->found_link != 0)
2394                 goto out;
2395         if (list_empty(&rec->backrefs))
2396                 goto out;
2397         backref = to_inode_backref(rec->backrefs.next);
2398         if (!backref->found_inode_ref)
2399                 goto out;
2400         if (backref->index != 0 || backref->namelen != 2 ||
2401             memcmp(backref->name, "..", 2))
2402                 goto out;
2403         if (backref->found_dir_index || backref->found_dir_item)
2404                 goto out;
2405         ret = 0;
2406 out:
2407         return ret;
2408 }
2409
2410 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2411                               struct btrfs_root *root, struct btrfs_path *path,
2412                               struct inode_record *rec)
2413 {
2414         struct btrfs_inode_item *ei;
2415         struct btrfs_key key;
2416         int ret;
2417
2418         key.objectid = rec->ino;
2419         key.type = BTRFS_INODE_ITEM_KEY;
2420         key.offset = (u64)-1;
2421
2422         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2423         if (ret < 0)
2424                 goto out;
2425         if (ret) {
2426                 if (!path->slots[0]) {
2427                         ret = -ENOENT;
2428                         goto out;
2429                 }
2430                 path->slots[0]--;
2431                 ret = 0;
2432         }
2433         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2434         if (key.objectid != rec->ino) {
2435                 ret = -ENOENT;
2436                 goto out;
2437         }
2438
2439         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2440                             struct btrfs_inode_item);
2441         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2442         btrfs_mark_buffer_dirty(path->nodes[0]);
2443         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2444         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2445                root->root_key.objectid);
2446 out:
2447         btrfs_release_path(path);
2448         return ret;
2449 }
2450
2451 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2452                                     struct btrfs_root *root,
2453                                     struct btrfs_path *path,
2454                                     struct inode_record *rec)
2455 {
2456         int ret;
2457
2458         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2459         btrfs_release_path(path);
2460         if (!ret)
2461                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2462         return ret;
2463 }
2464
2465 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2466                                struct btrfs_root *root,
2467                                struct btrfs_path *path,
2468                                struct inode_record *rec)
2469 {
2470         struct btrfs_inode_item *ei;
2471         struct btrfs_key key;
2472         int ret = 0;
2473
2474         key.objectid = rec->ino;
2475         key.type = BTRFS_INODE_ITEM_KEY;
2476         key.offset = 0;
2477
2478         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2479         if (ret) {
2480                 if (ret > 0)
2481                         ret = -ENOENT;
2482                 goto out;
2483         }
2484
2485         /* Since ret == 0, no need to check anything */
2486         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2487                             struct btrfs_inode_item);
2488         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2489         btrfs_mark_buffer_dirty(path->nodes[0]);
2490         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2491         printf("reset nbytes for ino %llu root %llu\n",
2492                rec->ino, root->root_key.objectid);
2493 out:
2494         btrfs_release_path(path);
2495         return ret;
2496 }
2497
2498 static int add_missing_dir_index(struct btrfs_root *root,
2499                                  struct cache_tree *inode_cache,
2500                                  struct inode_record *rec,
2501                                  struct inode_backref *backref)
2502 {
2503         struct btrfs_path path;
2504         struct btrfs_trans_handle *trans;
2505         struct btrfs_dir_item *dir_item;
2506         struct extent_buffer *leaf;
2507         struct btrfs_key key;
2508         struct btrfs_disk_key disk_key;
2509         struct inode_record *dir_rec;
2510         unsigned long name_ptr;
2511         u32 data_size = sizeof(*dir_item) + backref->namelen;
2512         int ret;
2513
2514         trans = btrfs_start_transaction(root, 1);
2515         if (IS_ERR(trans))
2516                 return PTR_ERR(trans);
2517
2518         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2519                 (unsigned long long)rec->ino);
2520
2521         btrfs_init_path(&path);
2522         key.objectid = backref->dir;
2523         key.type = BTRFS_DIR_INDEX_KEY;
2524         key.offset = backref->index;
2525         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2526         BUG_ON(ret);
2527
2528         leaf = path.nodes[0];
2529         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2530
2531         disk_key.objectid = cpu_to_le64(rec->ino);
2532         disk_key.type = BTRFS_INODE_ITEM_KEY;
2533         disk_key.offset = 0;
2534
2535         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2536         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2537         btrfs_set_dir_data_len(leaf, dir_item, 0);
2538         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2539         name_ptr = (unsigned long)(dir_item + 1);
2540         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2541         btrfs_mark_buffer_dirty(leaf);
2542         btrfs_release_path(&path);
2543         btrfs_commit_transaction(trans, root);
2544
2545         backref->found_dir_index = 1;
2546         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2547         BUG_ON(IS_ERR(dir_rec));
2548         if (!dir_rec)
2549                 return 0;
2550         dir_rec->found_size += backref->namelen;
2551         if (dir_rec->found_size == dir_rec->isize &&
2552             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2553                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2554         if (dir_rec->found_size != dir_rec->isize)
2555                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2556
2557         return 0;
2558 }
2559
2560 static int delete_dir_index(struct btrfs_root *root,
2561                             struct inode_backref *backref)
2562 {
2563         struct btrfs_trans_handle *trans;
2564         struct btrfs_dir_item *di;
2565         struct btrfs_path path;
2566         int ret = 0;
2567
2568         trans = btrfs_start_transaction(root, 1);
2569         if (IS_ERR(trans))
2570                 return PTR_ERR(trans);
2571
2572         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2573                 (unsigned long long)backref->dir,
2574                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2575                 (unsigned long long)root->objectid);
2576
2577         btrfs_init_path(&path);
2578         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2579                                     backref->name, backref->namelen,
2580                                     backref->index, -1);
2581         if (IS_ERR(di)) {
2582                 ret = PTR_ERR(di);
2583                 btrfs_release_path(&path);
2584                 btrfs_commit_transaction(trans, root);
2585                 if (ret == -ENOENT)
2586                         return 0;
2587                 return ret;
2588         }
2589
2590         if (!di)
2591                 ret = btrfs_del_item(trans, root, &path);
2592         else
2593                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2594         BUG_ON(ret);
2595         btrfs_release_path(&path);
2596         btrfs_commit_transaction(trans, root);
2597         return ret;
2598 }
2599
2600 static int create_inode_item(struct btrfs_root *root,
2601                              struct inode_record *rec,
2602                              int root_dir)
2603 {
2604         struct btrfs_trans_handle *trans;
2605         struct btrfs_inode_item inode_item;
2606         time_t now = time(NULL);
2607         int ret;
2608
2609         trans = btrfs_start_transaction(root, 1);
2610         if (IS_ERR(trans)) {
2611                 ret = PTR_ERR(trans);
2612                 return ret;
2613         }
2614
2615         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2616                 "be incomplete, please check permissions and content after "
2617                 "the fsck completes.\n", (unsigned long long)root->objectid,
2618                 (unsigned long long)rec->ino);
2619
2620         memset(&inode_item, 0, sizeof(inode_item));
2621         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2622         if (root_dir)
2623                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2624         else
2625                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2626         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2627         if (rec->found_dir_item) {
2628                 if (rec->found_file_extent)
2629                         fprintf(stderr, "root %llu inode %llu has both a dir "
2630                                 "item and extents, unsure if it is a dir or a "
2631                                 "regular file so setting it as a directory\n",
2632                                 (unsigned long long)root->objectid,
2633                                 (unsigned long long)rec->ino);
2634                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2635                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2636         } else if (!rec->found_dir_item) {
2637                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2638                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2639         }
2640         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2641         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2642         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2643         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2644         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2645         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2646         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2647         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2648
2649         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2650         BUG_ON(ret);
2651         btrfs_commit_transaction(trans, root);
2652         return 0;
2653 }
2654
2655 static int repair_inode_backrefs(struct btrfs_root *root,
2656                                  struct inode_record *rec,
2657                                  struct cache_tree *inode_cache,
2658                                  int delete)
2659 {
2660         struct inode_backref *tmp, *backref;
2661         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2662         int ret = 0;
2663         int repaired = 0;
2664
2665         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2666                 if (!delete && rec->ino == root_dirid) {
2667                         if (!rec->found_inode_item) {
2668                                 ret = create_inode_item(root, rec, 1);
2669                                 if (ret)
2670                                         break;
2671                                 repaired++;
2672                         }
2673                 }
2674
2675                 /* Index 0 for root dir's are special, don't mess with it */
2676                 if (rec->ino == root_dirid && backref->index == 0)
2677                         continue;
2678
2679                 if (delete &&
2680                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2681                      (backref->found_dir_index && backref->found_inode_ref &&
2682                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2683                         ret = delete_dir_index(root, backref);
2684                         if (ret)
2685                                 break;
2686                         repaired++;
2687                         list_del(&backref->list);
2688                         free(backref);
2689                         continue;
2690                 }
2691
2692                 if (!delete && !backref->found_dir_index &&
2693                     backref->found_dir_item && backref->found_inode_ref) {
2694                         ret = add_missing_dir_index(root, inode_cache, rec,
2695                                                     backref);
2696                         if (ret)
2697                                 break;
2698                         repaired++;
2699                         if (backref->found_dir_item &&
2700                             backref->found_dir_index) {
2701                                 if (!backref->errors &&
2702                                     backref->found_inode_ref) {
2703                                         list_del(&backref->list);
2704                                         free(backref);
2705                                         continue;
2706                                 }
2707                         }
2708                 }
2709
2710                 if (!delete && (!backref->found_dir_index &&
2711                                 !backref->found_dir_item &&
2712                                 backref->found_inode_ref)) {
2713                         struct btrfs_trans_handle *trans;
2714                         struct btrfs_key location;
2715
2716                         ret = check_dir_conflict(root, backref->name,
2717                                                  backref->namelen,
2718                                                  backref->dir,
2719                                                  backref->index);
2720                         if (ret) {
2721                                 /*
2722                                  * let nlink fixing routine to handle it,
2723                                  * which can do it better.
2724                                  */
2725                                 ret = 0;
2726                                 break;
2727                         }
2728                         location.objectid = rec->ino;
2729                         location.type = BTRFS_INODE_ITEM_KEY;
2730                         location.offset = 0;
2731
2732                         trans = btrfs_start_transaction(root, 1);
2733                         if (IS_ERR(trans)) {
2734                                 ret = PTR_ERR(trans);
2735                                 break;
2736                         }
2737                         fprintf(stderr, "adding missing dir index/item pair "
2738                                 "for inode %llu\n",
2739                                 (unsigned long long)rec->ino);
2740                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2741                                                     backref->namelen,
2742                                                     backref->dir, &location,
2743                                                     imode_to_type(rec->imode),
2744                                                     backref->index);
2745                         BUG_ON(ret);
2746                         btrfs_commit_transaction(trans, root);
2747                         repaired++;
2748                 }
2749
2750                 if (!delete && (backref->found_inode_ref &&
2751                                 backref->found_dir_index &&
2752                                 backref->found_dir_item &&
2753                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2754                                 !rec->found_inode_item)) {
2755                         ret = create_inode_item(root, rec, 0);
2756                         if (ret)
2757                                 break;
2758                         repaired++;
2759                 }
2760
2761         }
2762         return ret ? ret : repaired;
2763 }
2764
2765 /*
2766  * To determine the file type for nlink/inode_item repair
2767  *
2768  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2769  * Return -ENOENT if file type is not found.
2770  */
2771 static int find_file_type(struct inode_record *rec, u8 *type)
2772 {
2773         struct inode_backref *backref;
2774
2775         /* For inode item recovered case */
2776         if (rec->found_inode_item) {
2777                 *type = imode_to_type(rec->imode);
2778                 return 0;
2779         }
2780
2781         list_for_each_entry(backref, &rec->backrefs, list) {
2782                 if (backref->found_dir_index || backref->found_dir_item) {
2783                         *type = backref->filetype;
2784                         return 0;
2785                 }
2786         }
2787         return -ENOENT;
2788 }
2789
2790 /*
2791  * To determine the file name for nlink repair
2792  *
2793  * Return 0 if file name is found, set name and namelen.
2794  * Return -ENOENT if file name is not found.
2795  */
2796 static int find_file_name(struct inode_record *rec,
2797                           char *name, int *namelen)
2798 {
2799         struct inode_backref *backref;
2800
2801         list_for_each_entry(backref, &rec->backrefs, list) {
2802                 if (backref->found_dir_index || backref->found_dir_item ||
2803                     backref->found_inode_ref) {
2804                         memcpy(name, backref->name, backref->namelen);
2805                         *namelen = backref->namelen;
2806                         return 0;
2807                 }
2808         }
2809         return -ENOENT;
2810 }
2811
2812 /* Reset the nlink of the inode to the correct one */
2813 static int reset_nlink(struct btrfs_trans_handle *trans,
2814                        struct btrfs_root *root,
2815                        struct btrfs_path *path,
2816                        struct inode_record *rec)
2817 {
2818         struct inode_backref *backref;
2819         struct inode_backref *tmp;
2820         struct btrfs_key key;
2821         struct btrfs_inode_item *inode_item;
2822         int ret = 0;
2823
2824         /* We don't believe this either, reset it and iterate backref */
2825         rec->found_link = 0;
2826
2827         /* Remove all backref including the valid ones */
2828         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2829                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2830                                    backref->index, backref->name,
2831                                    backref->namelen, 0);
2832                 if (ret < 0)
2833                         goto out;
2834
2835                 /* remove invalid backref, so it won't be added back */
2836                 if (!(backref->found_dir_index &&
2837                       backref->found_dir_item &&
2838                       backref->found_inode_ref)) {
2839                         list_del(&backref->list);
2840                         free(backref);
2841                 } else {
2842                         rec->found_link++;
2843                 }
2844         }
2845
2846         /* Set nlink to 0 */
2847         key.objectid = rec->ino;
2848         key.type = BTRFS_INODE_ITEM_KEY;
2849         key.offset = 0;
2850         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2851         if (ret < 0)
2852                 goto out;
2853         if (ret > 0) {
2854                 ret = -ENOENT;
2855                 goto out;
2856         }
2857         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2858                                     struct btrfs_inode_item);
2859         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2860         btrfs_mark_buffer_dirty(path->nodes[0]);
2861         btrfs_release_path(path);
2862
2863         /*
2864          * Add back valid inode_ref/dir_item/dir_index,
2865          * add_link() will handle the nlink inc, so new nlink must be correct
2866          */
2867         list_for_each_entry(backref, &rec->backrefs, list) {
2868                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2869                                      backref->name, backref->namelen,
2870                                      backref->filetype, &backref->index, 1);
2871                 if (ret < 0)
2872                         goto out;
2873         }
2874 out:
2875         btrfs_release_path(path);
2876         return ret;
2877 }
2878
2879 static int get_highest_inode(struct btrfs_trans_handle *trans,
2880                                 struct btrfs_root *root,
2881                                 struct btrfs_path *path,
2882                                 u64 *highest_ino)
2883 {
2884         struct btrfs_key key, found_key;
2885         int ret;
2886
2887         btrfs_init_path(path);
2888         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2889         key.offset = -1;
2890         key.type = BTRFS_INODE_ITEM_KEY;
2891         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2892         if (ret == 1) {
2893                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2894                                 path->slots[0] - 1);
2895                 *highest_ino = found_key.objectid;
2896                 ret = 0;
2897         }
2898         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2899                 ret = -EOVERFLOW;
2900         btrfs_release_path(path);
2901         return ret;
2902 }
2903
2904 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2905                                struct btrfs_root *root,
2906                                struct btrfs_path *path,
2907                                struct inode_record *rec)
2908 {
2909         char *dir_name = "lost+found";
2910         char namebuf[BTRFS_NAME_LEN] = {0};
2911         u64 lost_found_ino;
2912         u32 mode = 0700;
2913         u8 type = 0;
2914         int namelen = 0;
2915         int name_recovered = 0;
2916         int type_recovered = 0;
2917         int ret = 0;
2918
2919         /*
2920          * Get file name and type first before these invalid inode ref
2921          * are deleted by remove_all_invalid_backref()
2922          */
2923         name_recovered = !find_file_name(rec, namebuf, &namelen);
2924         type_recovered = !find_file_type(rec, &type);
2925
2926         if (!name_recovered) {
2927                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2928                        rec->ino, rec->ino);
2929                 namelen = count_digits(rec->ino);
2930                 sprintf(namebuf, "%llu", rec->ino);
2931                 name_recovered = 1;
2932         }
2933         if (!type_recovered) {
2934                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2935                        rec->ino);
2936                 type = BTRFS_FT_REG_FILE;
2937                 type_recovered = 1;
2938         }
2939
2940         ret = reset_nlink(trans, root, path, rec);
2941         if (ret < 0) {
2942                 fprintf(stderr,
2943                         "Failed to reset nlink for inode %llu: %s\n",
2944                         rec->ino, strerror(-ret));
2945                 goto out;
2946         }
2947
2948         if (rec->found_link == 0) {
2949                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2950                 if (ret < 0)
2951                         goto out;
2952                 lost_found_ino++;
2953                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2954                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2955                                   mode);
2956                 if (ret < 0) {
2957                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2958                                 dir_name, strerror(-ret));
2959                         goto out;
2960                 }
2961                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2962                                      namebuf, namelen, type, NULL, 1);
2963                 /*
2964                  * Add ".INO" suffix several times to handle case where
2965                  * "FILENAME.INO" is already taken by another file.
2966                  */
2967                 while (ret == -EEXIST) {
2968                         /*
2969                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2970                          */
2971                         if (namelen + count_digits(rec->ino) + 1 >
2972                             BTRFS_NAME_LEN) {
2973                                 ret = -EFBIG;
2974                                 goto out;
2975                         }
2976                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2977                                  ".%llu", rec->ino);
2978                         namelen += count_digits(rec->ino) + 1;
2979                         ret = btrfs_add_link(trans, root, rec->ino,
2980                                              lost_found_ino, namebuf,
2981                                              namelen, type, NULL, 1);
2982                 }
2983                 if (ret < 0) {
2984                         fprintf(stderr,
2985                                 "Failed to link the inode %llu to %s dir: %s\n",
2986                                 rec->ino, dir_name, strerror(-ret));
2987                         goto out;
2988                 }
2989                 /*
2990                  * Just increase the found_link, don't actually add the
2991                  * backref. This will make things easier and this inode
2992                  * record will be freed after the repair is done.
2993                  * So fsck will not report problem about this inode.
2994                  */
2995                 rec->found_link++;
2996                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2997                        namelen, namebuf, dir_name);
2998         }
2999         printf("Fixed the nlink of inode %llu\n", rec->ino);
3000 out:
3001         /*
3002          * Clear the flag anyway, or we will loop forever for the same inode
3003          * as it will not be removed from the bad inode list and the dead loop
3004          * happens.
3005          */
3006         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3007         btrfs_release_path(path);
3008         return ret;
3009 }
3010
3011 /*
3012  * Check if there is any normal(reg or prealloc) file extent for given
3013  * ino.
3014  * This is used to determine the file type when neither its dir_index/item or
3015  * inode_item exists.
3016  *
3017  * This will *NOT* report error, if any error happens, just consider it does
3018  * not have any normal file extent.
3019  */
3020 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3021 {
3022         struct btrfs_path path;
3023         struct btrfs_key key;
3024         struct btrfs_key found_key;
3025         struct btrfs_file_extent_item *fi;
3026         u8 type;
3027         int ret = 0;
3028
3029         btrfs_init_path(&path);
3030         key.objectid = ino;
3031         key.type = BTRFS_EXTENT_DATA_KEY;
3032         key.offset = 0;
3033
3034         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3035         if (ret < 0) {
3036                 ret = 0;
3037                 goto out;
3038         }
3039         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3040                 ret = btrfs_next_leaf(root, &path);
3041                 if (ret) {
3042                         ret = 0;
3043                         goto out;
3044                 }
3045         }
3046         while (1) {
3047                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3048                                       path.slots[0]);
3049                 if (found_key.objectid != ino ||
3050                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3051                         break;
3052                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3053                                     struct btrfs_file_extent_item);
3054                 type = btrfs_file_extent_type(path.nodes[0], fi);
3055                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3056                         ret = 1;
3057                         goto out;
3058                 }
3059         }
3060 out:
3061         btrfs_release_path(&path);
3062         return ret;
3063 }
3064
3065 static u32 btrfs_type_to_imode(u8 type)
3066 {
3067         static u32 imode_by_btrfs_type[] = {
3068                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3069                 [BTRFS_FT_DIR]          = S_IFDIR,
3070                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3071                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3072                 [BTRFS_FT_FIFO]         = S_IFIFO,
3073                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3074                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3075         };
3076
3077         return imode_by_btrfs_type[(type)];
3078 }
3079
3080 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3081                                 struct btrfs_root *root,
3082                                 struct btrfs_path *path,
3083                                 struct inode_record *rec)
3084 {
3085         u8 filetype;
3086         u32 mode = 0700;
3087         int type_recovered = 0;
3088         int ret = 0;
3089
3090         printf("Trying to rebuild inode:%llu\n", rec->ino);
3091
3092         type_recovered = !find_file_type(rec, &filetype);
3093
3094         /*
3095          * Try to determine inode type if type not found.
3096          *
3097          * For found regular file extent, it must be FILE.
3098          * For found dir_item/index, it must be DIR.
3099          *
3100          * For undetermined one, use FILE as fallback.
3101          *
3102          * TODO:
3103          * 1. If found backref(inode_index/item is already handled) to it,
3104          *    it must be DIR.
3105          *    Need new inode-inode ref structure to allow search for that.
3106          */
3107         if (!type_recovered) {
3108                 if (rec->found_file_extent &&
3109                     find_normal_file_extent(root, rec->ino)) {
3110                         type_recovered = 1;
3111                         filetype = BTRFS_FT_REG_FILE;
3112                 } else if (rec->found_dir_item) {
3113                         type_recovered = 1;
3114                         filetype = BTRFS_FT_DIR;
3115                 } else if (!list_empty(&rec->orphan_extents)) {
3116                         type_recovered = 1;
3117                         filetype = BTRFS_FT_REG_FILE;
3118                 } else{
3119                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3120                                rec->ino);
3121                         type_recovered = 1;
3122                         filetype = BTRFS_FT_REG_FILE;
3123                 }
3124         }
3125
3126         ret = btrfs_new_inode(trans, root, rec->ino,
3127                               mode | btrfs_type_to_imode(filetype));
3128         if (ret < 0)
3129                 goto out;
3130
3131         /*
3132          * Here inode rebuild is done, we only rebuild the inode item,
3133          * don't repair the nlink(like move to lost+found).
3134          * That is the job of nlink repair.
3135          *
3136          * We just fill the record and return
3137          */
3138         rec->found_dir_item = 1;
3139         rec->imode = mode | btrfs_type_to_imode(filetype);
3140         rec->nlink = 0;
3141         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3142         /* Ensure the inode_nlinks repair function will be called */
3143         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3144 out:
3145         return ret;
3146 }
3147
3148 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3149                                       struct btrfs_root *root,
3150                                       struct btrfs_path *path,
3151                                       struct inode_record *rec)
3152 {
3153         struct orphan_data_extent *orphan;
3154         struct orphan_data_extent *tmp;
3155         int ret = 0;
3156
3157         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3158                 /*
3159                  * Check for conflicting file extents
3160                  *
3161                  * Here we don't know whether the extents is compressed or not,
3162                  * so we can only assume it not compressed nor data offset,
3163                  * and use its disk_len as extent length.
3164                  */
3165                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3166                                        orphan->offset, orphan->disk_len, 0);
3167                 btrfs_release_path(path);
3168                 if (ret < 0)
3169                         goto out;
3170                 if (!ret) {
3171                         fprintf(stderr,
3172                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3173                                 orphan->disk_bytenr, orphan->disk_len);
3174                         ret = btrfs_free_extent(trans,
3175                                         root->fs_info->extent_root,
3176                                         orphan->disk_bytenr, orphan->disk_len,
3177                                         0, root->objectid, orphan->objectid,
3178                                         orphan->offset);
3179                         if (ret < 0)
3180                                 goto out;
3181                 }
3182                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3183                                 orphan->offset, orphan->disk_bytenr,
3184                                 orphan->disk_len, orphan->disk_len);
3185                 if (ret < 0)
3186                         goto out;
3187
3188                 /* Update file size info */
3189                 rec->found_size += orphan->disk_len;
3190                 if (rec->found_size == rec->nbytes)
3191                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3192
3193                 /* Update the file extent hole info too */
3194                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3195                                            orphan->disk_len);
3196                 if (ret < 0)
3197                         goto out;
3198                 if (RB_EMPTY_ROOT(&rec->holes))
3199                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3200
3201                 list_del(&orphan->list);
3202                 free(orphan);
3203         }
3204         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3205 out:
3206         return ret;
3207 }
3208
3209 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3210                                         struct btrfs_root *root,
3211                                         struct btrfs_path *path,
3212                                         struct inode_record *rec)
3213 {
3214         struct rb_node *node;
3215         struct file_extent_hole *hole;
3216         int found = 0;
3217         int ret = 0;
3218
3219         node = rb_first(&rec->holes);
3220
3221         while (node) {
3222                 found = 1;
3223                 hole = rb_entry(node, struct file_extent_hole, node);
3224                 ret = btrfs_punch_hole(trans, root, rec->ino,
3225                                        hole->start, hole->len);
3226                 if (ret < 0)
3227                         goto out;
3228                 ret = del_file_extent_hole(&rec->holes, hole->start,
3229                                            hole->len);
3230                 if (ret < 0)
3231                         goto out;
3232                 if (RB_EMPTY_ROOT(&rec->holes))
3233                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3234                 node = rb_first(&rec->holes);
3235         }
3236         /* special case for a file losing all its file extent */
3237         if (!found) {
3238                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3239                                        round_up(rec->isize,
3240                                                 root->fs_info->sectorsize));
3241                 if (ret < 0)
3242                         goto out;
3243         }
3244         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3245                rec->ino, root->objectid);
3246 out:
3247         return ret;
3248 }
3249
3250 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3251 {
3252         struct btrfs_trans_handle *trans;
3253         struct btrfs_path path;
3254         int ret = 0;
3255
3256         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3257                              I_ERR_NO_ORPHAN_ITEM |
3258                              I_ERR_LINK_COUNT_WRONG |
3259                              I_ERR_NO_INODE_ITEM |
3260                              I_ERR_FILE_EXTENT_ORPHAN |
3261                              I_ERR_FILE_EXTENT_DISCOUNT|
3262                              I_ERR_FILE_NBYTES_WRONG)))
3263                 return rec->errors;
3264
3265         /*
3266          * For nlink repair, it may create a dir and add link, so
3267          * 2 for parent(256)'s dir_index and dir_item
3268          * 2 for lost+found dir's inode_item and inode_ref
3269          * 1 for the new inode_ref of the file
3270          * 2 for lost+found dir's dir_index and dir_item for the file
3271          */
3272         trans = btrfs_start_transaction(root, 7);
3273         if (IS_ERR(trans))
3274                 return PTR_ERR(trans);
3275
3276         btrfs_init_path(&path);
3277         if (rec->errors & I_ERR_NO_INODE_ITEM)
3278                 ret = repair_inode_no_item(trans, root, &path, rec);
3279         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3280                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3281         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3282                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3283         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3284                 ret = repair_inode_isize(trans, root, &path, rec);
3285         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3286                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3287         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3288                 ret = repair_inode_nlinks(trans, root, &path, rec);
3289         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3290                 ret = repair_inode_nbytes(trans, root, &path, rec);
3291         btrfs_commit_transaction(trans, root);
3292         btrfs_release_path(&path);
3293         return ret;
3294 }
3295
3296 static int check_inode_recs(struct btrfs_root *root,
3297                             struct cache_tree *inode_cache)
3298 {
3299         struct cache_extent *cache;
3300         struct ptr_node *node;
3301         struct inode_record *rec;
3302         struct inode_backref *backref;
3303         int stage = 0;
3304         int ret = 0;
3305         int err = 0;
3306         u64 error = 0;
3307         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3308
3309         if (btrfs_root_refs(&root->root_item) == 0) {
3310                 if (!cache_tree_empty(inode_cache))
3311                         fprintf(stderr, "warning line %d\n", __LINE__);
3312                 return 0;
3313         }
3314
3315         /*
3316          * We need to repair backrefs first because we could change some of the
3317          * errors in the inode recs.
3318          *
3319          * We also need to go through and delete invalid backrefs first and then
3320          * add the correct ones second.  We do this because we may get EEXIST
3321          * when adding back the correct index because we hadn't yet deleted the
3322          * invalid index.
3323          *
3324          * For example, if we were missing a dir index then the directories
3325          * isize would be wrong, so if we fixed the isize to what we thought it
3326          * would be and then fixed the backref we'd still have a invalid fs, so
3327          * we need to add back the dir index and then check to see if the isize
3328          * is still wrong.
3329          */
3330         while (stage < 3) {
3331                 stage++;
3332                 if (stage == 3 && !err)
3333                         break;
3334
3335                 cache = search_cache_extent(inode_cache, 0);
3336                 while (repair && cache) {
3337                         node = container_of(cache, struct ptr_node, cache);
3338                         rec = node->data;
3339                         cache = next_cache_extent(cache);
3340
3341                         /* Need to free everything up and rescan */
3342                         if (stage == 3) {
3343                                 remove_cache_extent(inode_cache, &node->cache);
3344                                 free(node);
3345                                 free_inode_rec(rec);
3346                                 continue;
3347                         }
3348
3349                         if (list_empty(&rec->backrefs))
3350                                 continue;
3351
3352                         ret = repair_inode_backrefs(root, rec, inode_cache,
3353                                                     stage == 1);
3354                         if (ret < 0) {
3355                                 err = ret;
3356                                 stage = 2;
3357                                 break;
3358                         } if (ret > 0) {
3359                                 err = -EAGAIN;
3360                         }
3361                 }
3362         }
3363         if (err)
3364                 return err;
3365
3366         rec = get_inode_rec(inode_cache, root_dirid, 0);
3367         BUG_ON(IS_ERR(rec));
3368         if (rec) {
3369                 ret = check_root_dir(rec);
3370                 if (ret) {
3371                         fprintf(stderr, "root %llu root dir %llu error\n",
3372                                 (unsigned long long)root->root_key.objectid,
3373                                 (unsigned long long)root_dirid);
3374                         print_inode_error(root, rec);
3375                         error++;
3376                 }
3377         } else {
3378                 if (repair) {
3379                         struct btrfs_trans_handle *trans;
3380
3381                         trans = btrfs_start_transaction(root, 1);
3382                         if (IS_ERR(trans)) {
3383                                 err = PTR_ERR(trans);
3384                                 return err;
3385                         }
3386
3387                         fprintf(stderr,
3388                                 "root %llu missing its root dir, recreating\n",
3389                                 (unsigned long long)root->objectid);
3390
3391                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3392                         BUG_ON(ret);
3393
3394                         btrfs_commit_transaction(trans, root);
3395                         return -EAGAIN;
3396                 }
3397
3398                 fprintf(stderr, "root %llu root dir %llu not found\n",
3399                         (unsigned long long)root->root_key.objectid,
3400                         (unsigned long long)root_dirid);
3401         }
3402
3403         while (1) {
3404                 cache = search_cache_extent(inode_cache, 0);
3405                 if (!cache)
3406                         break;
3407                 node = container_of(cache, struct ptr_node, cache);
3408                 rec = node->data;
3409                 remove_cache_extent(inode_cache, &node->cache);
3410                 free(node);
3411                 if (rec->ino == root_dirid ||
3412                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3413                         free_inode_rec(rec);
3414                         continue;
3415                 }
3416
3417                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3418                         ret = check_orphan_item(root, rec->ino);
3419                         if (ret == 0)
3420                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3421                         if (can_free_inode_rec(rec)) {
3422                                 free_inode_rec(rec);
3423                                 continue;
3424                         }
3425                 }
3426
3427                 if (!rec->found_inode_item)
3428                         rec->errors |= I_ERR_NO_INODE_ITEM;
3429                 if (rec->found_link != rec->nlink)
3430                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3431                 if (repair) {
3432                         ret = try_repair_inode(root, rec);
3433                         if (ret == 0 && can_free_inode_rec(rec)) {
3434                                 free_inode_rec(rec);
3435                                 continue;
3436                         }
3437                         ret = 0;
3438                 }
3439
3440                 if (!(repair && ret == 0))
3441                         error++;
3442                 print_inode_error(root, rec);
3443                 list_for_each_entry(backref, &rec->backrefs, list) {
3444                         if (!backref->found_dir_item)
3445                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3446                         if (!backref->found_dir_index)
3447                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3448                         if (!backref->found_inode_ref)
3449                                 backref->errors |= REF_ERR_NO_INODE_REF;
3450                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3451                                 " namelen %u name %s filetype %d errors %x",
3452                                 (unsigned long long)backref->dir,
3453                                 (unsigned long long)backref->index,
3454                                 backref->namelen, backref->name,
3455                                 backref->filetype, backref->errors);
3456                         print_ref_error(backref->errors);
3457                 }
3458                 free_inode_rec(rec);
3459         }
3460         return (error > 0) ? -1 : 0;
3461 }
3462
3463 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3464                                         u64 objectid)
3465 {
3466         struct cache_extent *cache;
3467         struct root_record *rec = NULL;
3468         int ret;
3469
3470         cache = lookup_cache_extent(root_cache, objectid, 1);
3471         if (cache) {
3472                 rec = container_of(cache, struct root_record, cache);
3473         } else {
3474                 rec = calloc(1, sizeof(*rec));
3475                 if (!rec)
3476                         return ERR_PTR(-ENOMEM);
3477                 rec->objectid = objectid;
3478                 INIT_LIST_HEAD(&rec->backrefs);
3479                 rec->cache.start = objectid;
3480                 rec->cache.size = 1;
3481
3482                 ret = insert_cache_extent(root_cache, &rec->cache);
3483                 if (ret)
3484                         return ERR_PTR(-EEXIST);
3485         }
3486         return rec;
3487 }
3488
3489 static struct root_backref *get_root_backref(struct root_record *rec,
3490                                              u64 ref_root, u64 dir, u64 index,
3491                                              const char *name, int namelen)
3492 {
3493         struct root_backref *backref;
3494
3495         list_for_each_entry(backref, &rec->backrefs, list) {
3496                 if (backref->ref_root != ref_root || backref->dir != dir ||
3497                     backref->namelen != namelen)
3498                         continue;
3499                 if (memcmp(name, backref->name, namelen))
3500                         continue;
3501                 return backref;
3502         }
3503
3504         backref = calloc(1, sizeof(*backref) + namelen + 1);
3505         if (!backref)
3506                 return NULL;
3507         backref->ref_root = ref_root;
3508         backref->dir = dir;
3509         backref->index = index;
3510         backref->namelen = namelen;
3511         memcpy(backref->name, name, namelen);
3512         backref->name[namelen] = '\0';
3513         list_add_tail(&backref->list, &rec->backrefs);
3514         return backref;
3515 }
3516
3517 static void free_root_record(struct cache_extent *cache)
3518 {
3519         struct root_record *rec;
3520         struct root_backref *backref;
3521
3522         rec = container_of(cache, struct root_record, cache);
3523         while (!list_empty(&rec->backrefs)) {
3524                 backref = to_root_backref(rec->backrefs.next);
3525                 list_del(&backref->list);
3526                 free(backref);
3527         }
3528
3529         free(rec);
3530 }
3531
3532 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3533
3534 static int add_root_backref(struct cache_tree *root_cache,
3535                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3536                             const char *name, int namelen,
3537                             int item_type, int errors)
3538 {
3539         struct root_record *rec;
3540         struct root_backref *backref;
3541
3542         rec = get_root_rec(root_cache, root_id);
3543         BUG_ON(IS_ERR(rec));
3544         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3545         BUG_ON(!backref);
3546
3547         backref->errors |= errors;
3548
3549         if (item_type != BTRFS_DIR_ITEM_KEY) {
3550                 if (backref->found_dir_index || backref->found_back_ref ||
3551                     backref->found_forward_ref) {
3552                         if (backref->index != index)
3553                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3554                 } else {
3555                         backref->index = index;
3556                 }
3557         }
3558
3559         if (item_type == BTRFS_DIR_ITEM_KEY) {
3560                 if (backref->found_forward_ref)
3561                         rec->found_ref++;
3562                 backref->found_dir_item = 1;
3563         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3564                 backref->found_dir_index = 1;
3565         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3566                 if (backref->found_forward_ref)
3567                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3568                 else if (backref->found_dir_item)
3569                         rec->found_ref++;
3570                 backref->found_forward_ref = 1;
3571         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3572                 if (backref->found_back_ref)
3573                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3574                 backref->found_back_ref = 1;
3575         } else {
3576                 BUG_ON(1);
3577         }
3578
3579         if (backref->found_forward_ref && backref->found_dir_item)
3580                 backref->reachable = 1;
3581         return 0;
3582 }
3583
3584 static int merge_root_recs(struct btrfs_root *root,
3585                            struct cache_tree *src_cache,
3586                            struct cache_tree *dst_cache)
3587 {
3588         struct cache_extent *cache;
3589         struct ptr_node *node;
3590         struct inode_record *rec;
3591         struct inode_backref *backref;
3592         int ret = 0;
3593
3594         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3595                 free_inode_recs_tree(src_cache);
3596                 return 0;
3597         }
3598
3599         while (1) {
3600                 cache = search_cache_extent(src_cache, 0);
3601                 if (!cache)
3602                         break;
3603                 node = container_of(cache, struct ptr_node, cache);
3604                 rec = node->data;
3605                 remove_cache_extent(src_cache, &node->cache);
3606                 free(node);
3607
3608                 ret = is_child_root(root, root->objectid, rec->ino);
3609                 if (ret < 0)
3610                         break;
3611                 else if (ret == 0)
3612                         goto skip;
3613
3614                 list_for_each_entry(backref, &rec->backrefs, list) {
3615                         BUG_ON(backref->found_inode_ref);
3616                         if (backref->found_dir_item)
3617                                 add_root_backref(dst_cache, rec->ino,
3618                                         root->root_key.objectid, backref->dir,
3619                                         backref->index, backref->name,
3620                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3621                                         backref->errors);
3622                         if (backref->found_dir_index)
3623                                 add_root_backref(dst_cache, rec->ino,
3624                                         root->root_key.objectid, backref->dir,
3625                                         backref->index, backref->name,
3626                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3627                                         backref->errors);
3628                 }
3629 skip:
3630                 free_inode_rec(rec);
3631         }
3632         if (ret < 0)
3633                 return ret;
3634         return 0;
3635 }
3636
3637 static int check_root_refs(struct btrfs_root *root,
3638                            struct cache_tree *root_cache)
3639 {
3640         struct root_record *rec;
3641         struct root_record *ref_root;
3642         struct root_backref *backref;
3643         struct cache_extent *cache;
3644         int loop = 1;
3645         int ret;
3646         int error;
3647         int errors = 0;
3648
3649         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3650         BUG_ON(IS_ERR(rec));
3651         rec->found_ref = 1;
3652
3653         /* fixme: this can not detect circular references */
3654         while (loop) {
3655                 loop = 0;
3656                 cache = search_cache_extent(root_cache, 0);
3657                 while (1) {
3658                         if (!cache)
3659                                 break;
3660                         rec = container_of(cache, struct root_record, cache);
3661                         cache = next_cache_extent(cache);
3662
3663                         if (rec->found_ref == 0)
3664                                 continue;
3665
3666                         list_for_each_entry(backref, &rec->backrefs, list) {
3667                                 if (!backref->reachable)
3668                                         continue;
3669
3670                                 ref_root = get_root_rec(root_cache,
3671                                                         backref->ref_root);
3672                                 BUG_ON(IS_ERR(ref_root));
3673                                 if (ref_root->found_ref > 0)
3674                                         continue;
3675
3676                                 backref->reachable = 0;
3677                                 rec->found_ref--;
3678                                 if (rec->found_ref == 0)
3679                                         loop = 1;
3680                         }
3681                 }
3682         }
3683
3684         cache = search_cache_extent(root_cache, 0);
3685         while (1) {
3686                 if (!cache)
3687                         break;
3688                 rec = container_of(cache, struct root_record, cache);
3689                 cache = next_cache_extent(cache);
3690
3691                 if (rec->found_ref == 0 &&
3692                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3693                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3694                         ret = check_orphan_item(root->fs_info->tree_root,
3695                                                 rec->objectid);
3696                         if (ret == 0)
3697                                 continue;
3698
3699                         /*
3700                          * If we don't have a root item then we likely just have
3701                          * a dir item in a snapshot for this root but no actual
3702                          * ref key or anything so it's meaningless.
3703                          */
3704                         if (!rec->found_root_item)
3705                                 continue;
3706                         errors++;
3707                         fprintf(stderr, "fs tree %llu not referenced\n",
3708                                 (unsigned long long)rec->objectid);
3709                 }
3710
3711                 error = 0;
3712                 if (rec->found_ref > 0 && !rec->found_root_item)
3713                         error = 1;
3714                 list_for_each_entry(backref, &rec->backrefs, list) {
3715                         if (!backref->found_dir_item)
3716                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3717                         if (!backref->found_dir_index)
3718                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3719                         if (!backref->found_back_ref)
3720                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3721                         if (!backref->found_forward_ref)
3722                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3723                         if (backref->reachable && backref->errors)
3724                                 error = 1;
3725                 }
3726                 if (!error)
3727                         continue;
3728
3729                 errors++;
3730                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3731                         (unsigned long long)rec->objectid, rec->found_ref,
3732                          rec->found_root_item ? "" : "not found");
3733
3734                 list_for_each_entry(backref, &rec->backrefs, list) {
3735                         if (!backref->reachable)
3736                                 continue;
3737                         if (!backref->errors && rec->found_root_item)
3738                                 continue;
3739                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3740                                 " index %llu namelen %u name %s errors %x\n",
3741                                 (unsigned long long)backref->ref_root,
3742                                 (unsigned long long)backref->dir,
3743                                 (unsigned long long)backref->index,
3744                                 backref->namelen, backref->name,
3745                                 backref->errors);
3746                         print_ref_error(backref->errors);
3747                 }
3748         }
3749         return errors > 0 ? 1 : 0;
3750 }
3751
3752 static int process_root_ref(struct extent_buffer *eb, int slot,
3753                             struct btrfs_key *key,
3754                             struct cache_tree *root_cache)
3755 {
3756         u64 dirid;
3757         u64 index;
3758         u32 len;
3759         u32 name_len;
3760         struct btrfs_root_ref *ref;
3761         char namebuf[BTRFS_NAME_LEN];
3762         int error;
3763
3764         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3765
3766         dirid = btrfs_root_ref_dirid(eb, ref);
3767         index = btrfs_root_ref_sequence(eb, ref);
3768         name_len = btrfs_root_ref_name_len(eb, ref);
3769
3770         if (name_len <= BTRFS_NAME_LEN) {
3771                 len = name_len;
3772                 error = 0;
3773         } else {
3774                 len = BTRFS_NAME_LEN;
3775                 error = REF_ERR_NAME_TOO_LONG;
3776         }
3777         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3778
3779         if (key->type == BTRFS_ROOT_REF_KEY) {
3780                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3781                                  index, namebuf, len, key->type, error);
3782         } else {
3783                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3784                                  index, namebuf, len, key->type, error);
3785         }
3786         return 0;
3787 }
3788
3789 static void free_corrupt_block(struct cache_extent *cache)
3790 {
3791         struct btrfs_corrupt_block *corrupt;
3792
3793         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3794         free(corrupt);
3795 }
3796
3797 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3798
3799 /*
3800  * Repair the btree of the given root.
3801  *
3802  * The fix is to remove the node key in corrupt_blocks cache_tree.
3803  * and rebalance the tree.
3804  * After the fix, the btree should be writeable.
3805  */
3806 static int repair_btree(struct btrfs_root *root,
3807                         struct cache_tree *corrupt_blocks)
3808 {
3809         struct btrfs_trans_handle *trans;
3810         struct btrfs_path path;
3811         struct btrfs_corrupt_block *corrupt;
3812         struct cache_extent *cache;
3813         struct btrfs_key key;
3814         u64 offset;
3815         int level;
3816         int ret = 0;
3817
3818         if (cache_tree_empty(corrupt_blocks))
3819                 return 0;
3820
3821         trans = btrfs_start_transaction(root, 1);
3822         if (IS_ERR(trans)) {
3823                 ret = PTR_ERR(trans);
3824                 fprintf(stderr, "Error starting transaction: %s\n",
3825                         strerror(-ret));
3826                 return ret;
3827         }
3828         btrfs_init_path(&path);
3829         cache = first_cache_extent(corrupt_blocks);
3830         while (cache) {
3831                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3832                                        cache);
3833                 level = corrupt->level;
3834                 path.lowest_level = level;
3835                 key.objectid = corrupt->key.objectid;
3836                 key.type = corrupt->key.type;
3837                 key.offset = corrupt->key.offset;
3838
3839                 /*
3840                  * Here we don't want to do any tree balance, since it may
3841                  * cause a balance with corrupted brother leaf/node,
3842                  * so ins_len set to 0 here.
3843                  * Balance will be done after all corrupt node/leaf is deleted.
3844                  */
3845                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 offset = btrfs_node_blockptr(path.nodes[level],
3849                                              path.slots[level]);
3850
3851                 /* Remove the ptr */
3852                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3853                 if (ret < 0)
3854                         goto out;
3855                 /*
3856                  * Remove the corresponding extent
3857                  * return value is not concerned.
3858                  */
3859                 btrfs_release_path(&path);
3860                 ret = btrfs_free_extent(trans, root, offset,
3861                                 root->fs_info->nodesize, 0,
3862                                 root->root_key.objectid, level - 1, 0);
3863                 cache = next_cache_extent(cache);
3864         }
3865
3866         /* Balance the btree using btrfs_search_slot() */
3867         cache = first_cache_extent(corrupt_blocks);
3868         while (cache) {
3869                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3870                                        cache);
3871                 memcpy(&key, &corrupt->key, sizeof(key));
3872                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3873                 if (ret < 0)
3874                         goto out;
3875                 /* return will always >0 since it won't find the item */
3876                 ret = 0;
3877                 btrfs_release_path(&path);
3878                 cache = next_cache_extent(cache);
3879         }
3880 out:
3881         btrfs_commit_transaction(trans, root);
3882         btrfs_release_path(&path);
3883         return ret;
3884 }
3885
3886 static int check_fs_root(struct btrfs_root *root,
3887                          struct cache_tree *root_cache,
3888                          struct walk_control *wc)
3889 {
3890         int ret = 0;
3891         int err = 0;
3892         int wret;
3893         int level;
3894         struct btrfs_path path;
3895         struct shared_node root_node;
3896         struct root_record *rec;
3897         struct btrfs_root_item *root_item = &root->root_item;
3898         struct cache_tree corrupt_blocks;
3899         struct orphan_data_extent *orphan;
3900         struct orphan_data_extent *tmp;
3901         enum btrfs_tree_block_status status;
3902         struct node_refs nrefs;
3903
3904         /*
3905          * Reuse the corrupt_block cache tree to record corrupted tree block
3906          *
3907          * Unlike the usage in extent tree check, here we do it in a per
3908          * fs/subvol tree base.
3909          */
3910         cache_tree_init(&corrupt_blocks);
3911         root->fs_info->corrupt_blocks = &corrupt_blocks;
3912
3913         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3914                 rec = get_root_rec(root_cache, root->root_key.objectid);
3915                 BUG_ON(IS_ERR(rec));
3916                 if (btrfs_root_refs(root_item) > 0)
3917                         rec->found_root_item = 1;
3918         }
3919
3920         btrfs_init_path(&path);
3921         memset(&root_node, 0, sizeof(root_node));
3922         cache_tree_init(&root_node.root_cache);
3923         cache_tree_init(&root_node.inode_cache);
3924         memset(&nrefs, 0, sizeof(nrefs));
3925
3926         /* Move the orphan extent record to corresponding inode_record */
3927         list_for_each_entry_safe(orphan, tmp,
3928                                  &root->orphan_data_extents, list) {
3929                 struct inode_record *inode;
3930
3931                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3932                                       1);
3933                 BUG_ON(IS_ERR(inode));
3934                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3935                 list_move(&orphan->list, &inode->orphan_extents);
3936         }
3937
3938         level = btrfs_header_level(root->node);
3939         memset(wc->nodes, 0, sizeof(wc->nodes));
3940         wc->nodes[level] = &root_node;
3941         wc->active_node = level;
3942         wc->root_level = level;
3943
3944         /* We may not have checked the root block, lets do that now */
3945         if (btrfs_is_leaf(root->node))
3946                 status = btrfs_check_leaf(root, NULL, root->node);
3947         else
3948                 status = btrfs_check_node(root, NULL, root->node);
3949         if (status != BTRFS_TREE_BLOCK_CLEAN)
3950                 return -EIO;
3951
3952         if (btrfs_root_refs(root_item) > 0 ||
3953             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3954                 path.nodes[level] = root->node;
3955                 extent_buffer_get(root->node);
3956                 path.slots[level] = 0;
3957         } else {
3958                 struct btrfs_key key;
3959                 struct btrfs_disk_key found_key;
3960
3961                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3962                 level = root_item->drop_level;
3963                 path.lowest_level = level;
3964                 if (level > btrfs_header_level(root->node) ||
3965                     level >= BTRFS_MAX_LEVEL) {
3966                         error("ignoring invalid drop level: %u", level);
3967                         goto skip_walking;
3968                 }
3969                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3970                 if (wret < 0)
3971                         goto skip_walking;
3972                 btrfs_node_key(path.nodes[level], &found_key,
3973                                 path.slots[level]);
3974                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3975                                         sizeof(found_key)));
3976         }
3977
3978         while (1) {
3979                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3980                 if (wret < 0)
3981                         ret = wret;
3982                 if (wret != 0)
3983                         break;
3984
3985                 wret = walk_up_tree(root, &path, wc, &level);
3986                 if (wret < 0)
3987                         ret = wret;
3988                 if (wret != 0)
3989                         break;
3990         }
3991 skip_walking:
3992         btrfs_release_path(&path);
3993
3994         if (!cache_tree_empty(&corrupt_blocks)) {
3995                 struct cache_extent *cache;
3996                 struct btrfs_corrupt_block *corrupt;
3997
3998                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3999                        root->root_key.objectid);
4000                 cache = first_cache_extent(&corrupt_blocks);
4001                 while (cache) {
4002                         corrupt = container_of(cache,
4003                                                struct btrfs_corrupt_block,
4004                                                cache);
4005                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4006                                cache->start, corrupt->level,
4007                                corrupt->key.objectid, corrupt->key.type,
4008                                corrupt->key.offset);
4009                         cache = next_cache_extent(cache);
4010                 }
4011                 if (repair) {
4012                         printf("Try to repair the btree for root %llu\n",
4013                                root->root_key.objectid);
4014                         ret = repair_btree(root, &corrupt_blocks);
4015                         if (ret < 0)
4016                                 fprintf(stderr, "Failed to repair btree: %s\n",
4017                                         strerror(-ret));
4018                         if (!ret)
4019                                 printf("Btree for root %llu is fixed\n",
4020                                        root->root_key.objectid);
4021                 }
4022         }
4023
4024         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4025         if (err < 0)
4026                 ret = err;
4027
4028         if (root_node.current) {
4029                 root_node.current->checked = 1;
4030                 maybe_free_inode_rec(&root_node.inode_cache,
4031                                 root_node.current);
4032         }
4033
4034         err = check_inode_recs(root, &root_node.inode_cache);
4035         if (!ret)
4036                 ret = err;
4037
4038         free_corrupt_blocks_tree(&corrupt_blocks);
4039         root->fs_info->corrupt_blocks = NULL;
4040         free_orphan_data_extents(&root->orphan_data_extents);
4041         return ret;
4042 }
4043
4044 static int fs_root_objectid(u64 objectid)
4045 {
4046         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4047             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4048                 return 1;
4049         return is_fstree(objectid);
4050 }
4051
4052 static int check_fs_roots(struct btrfs_root *root,
4053                           struct cache_tree *root_cache)
4054 {
4055         struct btrfs_path path;
4056         struct btrfs_key key;
4057         struct walk_control wc;
4058         struct extent_buffer *leaf, *tree_node;
4059         struct btrfs_root *tmp_root;
4060         struct btrfs_root *tree_root = root->fs_info->tree_root;
4061         int ret;
4062         int err = 0;
4063
4064         if (ctx.progress_enabled) {
4065                 ctx.tp = TASK_FS_ROOTS;
4066                 task_start(ctx.info);
4067         }
4068
4069         /*
4070          * Just in case we made any changes to the extent tree that weren't
4071          * reflected into the free space cache yet.
4072          */
4073         if (repair)
4074                 reset_cached_block_groups(root->fs_info);
4075         memset(&wc, 0, sizeof(wc));
4076         cache_tree_init(&wc.shared);
4077         btrfs_init_path(&path);
4078
4079 again:
4080         key.offset = 0;
4081         key.objectid = 0;
4082         key.type = BTRFS_ROOT_ITEM_KEY;
4083         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4084         if (ret < 0) {
4085                 err = 1;
4086                 goto out;
4087         }
4088         tree_node = tree_root->node;
4089         while (1) {
4090                 if (tree_node != tree_root->node) {
4091                         free_root_recs_tree(root_cache);
4092                         btrfs_release_path(&path);
4093                         goto again;
4094                 }
4095                 leaf = path.nodes[0];
4096                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4097                         ret = btrfs_next_leaf(tree_root, &path);
4098                         if (ret) {
4099                                 if (ret < 0)
4100                                         err = 1;
4101                                 break;
4102                         }
4103                         leaf = path.nodes[0];
4104                 }
4105                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4106                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4107                     fs_root_objectid(key.objectid)) {
4108                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4109                                 tmp_root = btrfs_read_fs_root_no_cache(
4110                                                 root->fs_info, &key);
4111                         } else {
4112                                 key.offset = (u64)-1;
4113                                 tmp_root = btrfs_read_fs_root(
4114                                                 root->fs_info, &key);
4115                         }
4116                         if (IS_ERR(tmp_root)) {
4117                                 err = 1;
4118                                 goto next;
4119                         }
4120                         ret = check_fs_root(tmp_root, root_cache, &wc);
4121                         if (ret == -EAGAIN) {
4122                                 free_root_recs_tree(root_cache);
4123                                 btrfs_release_path(&path);
4124                                 goto again;
4125                         }
4126                         if (ret)
4127                                 err = 1;
4128                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4129                                 btrfs_free_fs_root(tmp_root);
4130                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4131                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4132                         process_root_ref(leaf, path.slots[0], &key,
4133                                          root_cache);
4134                 }
4135 next:
4136                 path.slots[0]++;
4137         }
4138 out:
4139         btrfs_release_path(&path);
4140         if (err)
4141                 free_extent_cache_tree(&wc.shared);
4142         if (!cache_tree_empty(&wc.shared))
4143                 fprintf(stderr, "warning line %d\n", __LINE__);
4144
4145         task_stop(ctx.info);
4146
4147         return err;
4148 }
4149
4150 /*
4151  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4152  * INODE_REF/INODE_EXTREF match.
4153  *
4154  * @root:       the root of the fs/file tree
4155  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4156  * @key:        the key of the DIR_ITEM/DIR_INDEX
4157  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4158  *              distinguish root_dir between normal dir/file
4159  * @name:       the name in the INODE_REF/INODE_EXTREF
4160  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4161  * @mode:       the st_mode of INODE_ITEM
4162  *
4163  * Return 0 if no error occurred.
4164  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4165  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4166  * dir/file.
4167  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4168  * not match for normal dir/file.
4169  */
4170 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4171                          struct btrfs_key *key, u64 index, char *name,
4172                          u32 namelen, u32 mode)
4173 {
4174         struct btrfs_path path;
4175         struct extent_buffer *node;
4176         struct btrfs_dir_item *di;
4177         struct btrfs_key location;
4178         char namebuf[BTRFS_NAME_LEN] = {0};
4179         u32 total;
4180         u32 cur = 0;
4181         u32 len;
4182         u32 name_len;
4183         u32 data_len;
4184         u8 filetype;
4185         int slot;
4186         int ret;
4187
4188         btrfs_init_path(&path);
4189         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4190         if (ret < 0) {
4191                 ret = DIR_ITEM_MISSING;
4192                 goto out;
4193         }
4194
4195         /* Process root dir and goto out*/
4196         if (index == 0) {
4197                 if (ret == 0) {
4198                         ret = ROOT_DIR_ERROR;
4199                         error(
4200                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4201                                 root->objectid,
4202                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4203                                         "REF" : "EXTREF",
4204                                 ref_key->objectid, ref_key->offset,
4205                                 key->type == BTRFS_DIR_ITEM_KEY ?
4206                                         "DIR_ITEM" : "DIR_INDEX");
4207                 } else {
4208                         ret = 0;
4209                 }
4210
4211                 goto out;
4212         }
4213
4214         /* Process normal file/dir */
4215         if (ret > 0) {
4216                 ret = DIR_ITEM_MISSING;
4217                 error(
4218                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4219                         root->objectid,
4220                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4221                         ref_key->objectid, ref_key->offset,
4222                         key->type == BTRFS_DIR_ITEM_KEY ?
4223                                 "DIR_ITEM" : "DIR_INDEX",
4224                         key->objectid, key->offset, namelen, name,
4225                         imode_to_type(mode));
4226                 goto out;
4227         }
4228
4229         /* Check whether inode_id/filetype/name match */
4230         node = path.nodes[0];
4231         slot = path.slots[0];
4232         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4233         total = btrfs_item_size_nr(node, slot);
4234         while (cur < total) {
4235                 ret = DIR_ITEM_MISMATCH;
4236                 name_len = btrfs_dir_name_len(node, di);
4237                 data_len = btrfs_dir_data_len(node, di);
4238
4239                 btrfs_dir_item_key_to_cpu(node, di, &location);
4240                 if (location.objectid != ref_key->objectid ||
4241                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4242                     location.offset != 0)
4243                         goto next;
4244
4245                 filetype = btrfs_dir_type(node, di);
4246                 if (imode_to_type(mode) != filetype)
4247                         goto next;
4248
4249                 if (cur + sizeof(*di) + name_len > total ||
4250                     name_len > BTRFS_NAME_LEN) {
4251                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4252                                 root->objectid,
4253                                 key->type == BTRFS_DIR_ITEM_KEY ?
4254                                 "DIR_ITEM" : "DIR_INDEX",
4255                                 key->objectid, key->offset, name_len);
4256
4257                         if (cur + sizeof(*di) > total)
4258                                 break;
4259                         len = min_t(u32, total - cur - sizeof(*di),
4260                                     BTRFS_NAME_LEN);
4261                 } else {
4262                         len = name_len;
4263                 }
4264
4265                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4266                 if (len != namelen || strncmp(namebuf, name, len))
4267                         goto next;
4268
4269                 ret = 0;
4270                 goto out;
4271 next:
4272                 len = sizeof(*di) + name_len + data_len;
4273                 di = (struct btrfs_dir_item *)((char *)di + len);
4274                 cur += len;
4275         }
4276         if (ret == DIR_ITEM_MISMATCH)
4277                 error(
4278                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4279                         root->objectid,
4280                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4281                         ref_key->objectid, ref_key->offset,
4282                         key->type == BTRFS_DIR_ITEM_KEY ?
4283                                 "DIR_ITEM" : "DIR_INDEX",
4284                         key->objectid, key->offset, namelen, name,
4285                         imode_to_type(mode));
4286 out:
4287         btrfs_release_path(&path);
4288         return ret;
4289 }
4290
4291 /*
4292  * Traverse the given INODE_REF and call find_dir_item() to find related
4293  * DIR_ITEM/DIR_INDEX.
4294  *
4295  * @root:       the root of the fs/file tree
4296  * @ref_key:    the key of the INODE_REF
4297  * @refs:       the count of INODE_REF
4298  * @mode:       the st_mode of INODE_ITEM
4299  *
4300  * Return 0 if no error occurred.
4301  */
4302 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4303                            struct extent_buffer *node, int slot, u64 *refs,
4304                            int mode)
4305 {
4306         struct btrfs_key key;
4307         struct btrfs_inode_ref *ref;
4308         char namebuf[BTRFS_NAME_LEN] = {0};
4309         u32 total;
4310         u32 cur = 0;
4311         u32 len;
4312         u32 name_len;
4313         u64 index;
4314         int ret, err = 0;
4315
4316         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4317         total = btrfs_item_size_nr(node, slot);
4318
4319 next:
4320         /* Update inode ref count */
4321         (*refs)++;
4322
4323         index = btrfs_inode_ref_index(node, ref);
4324         name_len = btrfs_inode_ref_name_len(node, ref);
4325         if (cur + sizeof(*ref) + name_len > total ||
4326             name_len > BTRFS_NAME_LEN) {
4327                 warning("root %llu INODE_REF[%llu %llu] name too long",
4328                         root->objectid, ref_key->objectid, ref_key->offset);
4329
4330                 if (total < cur + sizeof(*ref))
4331                         goto out;
4332                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4333         } else {
4334                 len = name_len;
4335         }
4336
4337         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4338
4339         /* Check root dir ref name */
4340         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4341                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4342                       root->objectid, ref_key->objectid, ref_key->offset,
4343                       namebuf);
4344                 err |= ROOT_DIR_ERROR;
4345         }
4346
4347         /* Find related DIR_INDEX */
4348         key.objectid = ref_key->offset;
4349         key.type = BTRFS_DIR_INDEX_KEY;
4350         key.offset = index;
4351         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4352         err |= ret;
4353
4354         /* Find related dir_item */
4355         key.objectid = ref_key->offset;
4356         key.type = BTRFS_DIR_ITEM_KEY;
4357         key.offset = btrfs_name_hash(namebuf, len);
4358         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4359         err |= ret;
4360
4361         len = sizeof(*ref) + name_len;
4362         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4363         cur += len;
4364         if (cur < total)
4365                 goto next;
4366
4367 out:
4368         return err;
4369 }
4370
4371 /*
4372  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4373  * DIR_ITEM/DIR_INDEX.
4374  *
4375  * @root:       the root of the fs/file tree
4376  * @ref_key:    the key of the INODE_EXTREF
4377  * @refs:       the count of INODE_EXTREF
4378  * @mode:       the st_mode of INODE_ITEM
4379  *
4380  * Return 0 if no error occurred.
4381  */
4382 static int check_inode_extref(struct btrfs_root *root,
4383                               struct btrfs_key *ref_key,
4384                               struct extent_buffer *node, int slot, u64 *refs,
4385                               int mode)
4386 {
4387         struct btrfs_key key;
4388         struct btrfs_inode_extref *extref;
4389         char namebuf[BTRFS_NAME_LEN] = {0};
4390         u32 total;
4391         u32 cur = 0;
4392         u32 len;
4393         u32 name_len;
4394         u64 index;
4395         u64 parent;
4396         int ret;
4397         int err = 0;
4398
4399         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4400         total = btrfs_item_size_nr(node, slot);
4401
4402 next:
4403         /* update inode ref count */
4404         (*refs)++;
4405         name_len = btrfs_inode_extref_name_len(node, extref);
4406         index = btrfs_inode_extref_index(node, extref);
4407         parent = btrfs_inode_extref_parent(node, extref);
4408         if (name_len <= BTRFS_NAME_LEN) {
4409                 len = name_len;
4410         } else {
4411                 len = BTRFS_NAME_LEN;
4412                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4413                         root->objectid, ref_key->objectid, ref_key->offset);
4414         }
4415         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4416
4417         /* Check root dir ref name */
4418         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4419                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4420                       root->objectid, ref_key->objectid, ref_key->offset,
4421                       namebuf);
4422                 err |= ROOT_DIR_ERROR;
4423         }
4424
4425         /* find related dir_index */
4426         key.objectid = parent;
4427         key.type = BTRFS_DIR_INDEX_KEY;
4428         key.offset = index;
4429         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4430         err |= ret;
4431
4432         /* find related dir_item */
4433         key.objectid = parent;
4434         key.type = BTRFS_DIR_ITEM_KEY;
4435         key.offset = btrfs_name_hash(namebuf, len);
4436         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4437         err |= ret;
4438
4439         len = sizeof(*extref) + name_len;
4440         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4441         cur += len;
4442
4443         if (cur < total)
4444                 goto next;
4445
4446         return err;
4447 }
4448
4449 /*
4450  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4451  * DIR_ITEM/DIR_INDEX match.
4452  *
4453  * @root:       the root of the fs/file tree
4454  * @key:        the key of the INODE_REF/INODE_EXTREF
4455  * @name:       the name in the INODE_REF/INODE_EXTREF
4456  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4457  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4458  * to (u64)-1
4459  * @ext_ref:    the EXTENDED_IREF feature
4460  *
4461  * Return 0 if no error occurred.
4462  * Return >0 for error bitmap
4463  */
4464 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4465                           char *name, int namelen, u64 index,
4466                           unsigned int ext_ref)
4467 {
4468         struct btrfs_path path;
4469         struct btrfs_inode_ref *ref;
4470         struct btrfs_inode_extref *extref;
4471         struct extent_buffer *node;
4472         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4473         u32 total;
4474         u32 cur = 0;
4475         u32 len;
4476         u32 ref_namelen;
4477         u64 ref_index;
4478         u64 parent;
4479         u64 dir_id;
4480         int slot;
4481         int ret;
4482
4483         btrfs_init_path(&path);
4484         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4485         if (ret) {
4486                 ret = INODE_REF_MISSING;
4487                 goto extref;
4488         }
4489
4490         node = path.nodes[0];
4491         slot = path.slots[0];
4492
4493         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4494         total = btrfs_item_size_nr(node, slot);
4495
4496         /* Iterate all entry of INODE_REF */
4497         while (cur < total) {
4498                 ret = INODE_REF_MISSING;
4499
4500                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4501                 ref_index = btrfs_inode_ref_index(node, ref);
4502                 if (index != (u64)-1 && index != ref_index)
4503                         goto next_ref;
4504
4505                 if (cur + sizeof(*ref) + ref_namelen > total ||
4506                     ref_namelen > BTRFS_NAME_LEN) {
4507                         warning("root %llu INODE %s[%llu %llu] name too long",
4508                                 root->objectid,
4509                                 key->type == BTRFS_INODE_REF_KEY ?
4510                                         "REF" : "EXTREF",
4511                                 key->objectid, key->offset);
4512
4513                         if (cur + sizeof(*ref) > total)
4514                                 break;
4515                         len = min_t(u32, total - cur - sizeof(*ref),
4516                                     BTRFS_NAME_LEN);
4517                 } else {
4518                         len = ref_namelen;
4519                 }
4520
4521                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4522                                    len);
4523
4524                 if (len != namelen || strncmp(ref_namebuf, name, len))
4525                         goto next_ref;
4526
4527                 ret = 0;
4528                 goto out;
4529 next_ref:
4530                 len = sizeof(*ref) + ref_namelen;
4531                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4532                 cur += len;
4533         }
4534
4535 extref:
4536         /* Skip if not support EXTENDED_IREF feature */
4537         if (!ext_ref)
4538                 goto out;
4539
4540         btrfs_release_path(&path);
4541         btrfs_init_path(&path);
4542
4543         dir_id = key->offset;
4544         key->type = BTRFS_INODE_EXTREF_KEY;
4545         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4546
4547         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4548         if (ret) {
4549                 ret = INODE_REF_MISSING;
4550                 goto out;
4551         }
4552
4553         node = path.nodes[0];
4554         slot = path.slots[0];
4555
4556         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4557         cur = 0;
4558         total = btrfs_item_size_nr(node, slot);
4559
4560         /* Iterate all entry of INODE_EXTREF */
4561         while (cur < total) {
4562                 ret = INODE_REF_MISSING;
4563
4564                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4565                 ref_index = btrfs_inode_extref_index(node, extref);
4566                 parent = btrfs_inode_extref_parent(node, extref);
4567                 if (index != (u64)-1 && index != ref_index)
4568                         goto next_extref;
4569
4570                 if (parent != dir_id)
4571                         goto next_extref;
4572
4573                 if (ref_namelen <= BTRFS_NAME_LEN) {
4574                         len = ref_namelen;
4575                 } else {
4576                         len = BTRFS_NAME_LEN;
4577                         warning("root %llu INODE %s[%llu %llu] name too long",
4578                                 root->objectid,
4579                                 key->type == BTRFS_INODE_REF_KEY ?
4580                                         "REF" : "EXTREF",
4581                                 key->objectid, key->offset);
4582                 }
4583                 read_extent_buffer(node, ref_namebuf,
4584                                    (unsigned long)(extref + 1), len);
4585
4586                 if (len != namelen || strncmp(ref_namebuf, name, len))
4587                         goto next_extref;
4588
4589                 ret = 0;
4590                 goto out;
4591
4592 next_extref:
4593                 len = sizeof(*extref) + ref_namelen;
4594                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4595                 cur += len;
4596
4597         }
4598 out:
4599         btrfs_release_path(&path);
4600         return ret;
4601 }
4602
4603 /*
4604  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4605  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4606  *
4607  * @root:       the root of the fs/file tree
4608  * @key:        the key of the INODE_REF/INODE_EXTREF
4609  * @size:       the st_size of the INODE_ITEM
4610  * @ext_ref:    the EXTENDED_IREF feature
4611  *
4612  * Return 0 if no error occurred.
4613  */
4614 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4615                           struct extent_buffer *node, int slot, u64 *size,
4616                           unsigned int ext_ref)
4617 {
4618         struct btrfs_dir_item *di;
4619         struct btrfs_inode_item *ii;
4620         struct btrfs_path path;
4621         struct btrfs_key location;
4622         char namebuf[BTRFS_NAME_LEN] = {0};
4623         u32 total;
4624         u32 cur = 0;
4625         u32 len;
4626         u32 name_len;
4627         u32 data_len;
4628         u8 filetype;
4629         u32 mode;
4630         u64 index;
4631         int ret;
4632         int err = 0;
4633
4634         /*
4635          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4636          * ignore index check.
4637          */
4638         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4639
4640         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4641         total = btrfs_item_size_nr(node, slot);
4642
4643         while (cur < total) {
4644                 data_len = btrfs_dir_data_len(node, di);
4645                 if (data_len)
4646                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4647                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4648                               "DIR_ITEM" : "DIR_INDEX",
4649                               key->objectid, key->offset, data_len);
4650
4651                 name_len = btrfs_dir_name_len(node, di);
4652                 if (cur + sizeof(*di) + name_len > total ||
4653                     name_len > BTRFS_NAME_LEN) {
4654                         warning("root %llu %s[%llu %llu] name too long",
4655                                 root->objectid,
4656                                 key->type == BTRFS_DIR_ITEM_KEY ?
4657                                 "DIR_ITEM" : "DIR_INDEX",
4658                                 key->objectid, key->offset);
4659
4660                         if (cur + sizeof(*di) > total)
4661                                 break;
4662                         len = min_t(u32, total - cur - sizeof(*di),
4663                                     BTRFS_NAME_LEN);
4664                 } else {
4665                         len = name_len;
4666                 }
4667                 (*size) += name_len;
4668
4669                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4670                 filetype = btrfs_dir_type(node, di);
4671
4672                 btrfs_init_path(&path);
4673                 btrfs_dir_item_key_to_cpu(node, di, &location);
4674
4675                 /* Ignore related ROOT_ITEM check */
4676                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4677                         goto next;
4678
4679                 /* Check relative INODE_ITEM(existence/filetype) */
4680                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4681                 if (ret) {
4682                         err |= INODE_ITEM_MISSING;
4683                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4684                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4685                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4686                               key->offset, location.objectid, name_len,
4687                               namebuf, filetype);
4688                         goto next;
4689                 }
4690
4691                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4692                                     struct btrfs_inode_item);
4693                 mode = btrfs_inode_mode(path.nodes[0], ii);
4694
4695                 if (imode_to_type(mode) != filetype) {
4696                         err |= INODE_ITEM_MISMATCH;
4697                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4698                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4699                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4700                               key->offset, name_len, namebuf, filetype);
4701                 }
4702
4703                 /* Check relative INODE_REF/INODE_EXTREF */
4704                 location.type = BTRFS_INODE_REF_KEY;
4705                 location.offset = key->objectid;
4706                 ret = find_inode_ref(root, &location, namebuf, len,
4707                                        index, ext_ref);
4708                 err |= ret;
4709                 if (ret & INODE_REF_MISSING)
4710                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4711                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4712                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4713                               key->offset, name_len, namebuf, filetype);
4714
4715 next:
4716                 btrfs_release_path(&path);
4717                 len = sizeof(*di) + name_len + data_len;
4718                 di = (struct btrfs_dir_item *)((char *)di + len);
4719                 cur += len;
4720
4721                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4722                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4723                               root->objectid, key->objectid, key->offset);
4724                         break;
4725                 }
4726         }
4727
4728         return err;
4729 }
4730
4731 /*
4732  * Check file extent datasum/hole, update the size of the file extents,
4733  * check and update the last offset of the file extent.
4734  *
4735  * @root:       the root of fs/file tree.
4736  * @fkey:       the key of the file extent.
4737  * @nodatasum:  INODE_NODATASUM feature.
4738  * @size:       the sum of all EXTENT_DATA items size for this inode.
4739  * @end:        the offset of the last extent.
4740  *
4741  * Return 0 if no error occurred.
4742  */
4743 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4744                              struct extent_buffer *node, int slot,
4745                              unsigned int nodatasum, u64 *size, u64 *end)
4746 {
4747         struct btrfs_file_extent_item *fi;
4748         u64 disk_bytenr;
4749         u64 disk_num_bytes;
4750         u64 extent_num_bytes;
4751         u64 extent_offset;
4752         u64 csum_found;         /* In byte size, sectorsize aligned */
4753         u64 search_start;       /* Logical range start we search for csum */
4754         u64 search_len;         /* Logical range len we search for csum */
4755         unsigned int extent_type;
4756         unsigned int is_hole;
4757         int compressed = 0;
4758         int ret;
4759         int err = 0;
4760
4761         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4762
4763         /* Check inline extent */
4764         extent_type = btrfs_file_extent_type(node, fi);
4765         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4766                 struct btrfs_item *e = btrfs_item_nr(slot);
4767                 u32 item_inline_len;
4768
4769                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4770                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4771                 compressed = btrfs_file_extent_compression(node, fi);
4772                 if (extent_num_bytes == 0) {
4773                         error(
4774                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4775                                 root->objectid, fkey->objectid, fkey->offset);
4776                         err |= FILE_EXTENT_ERROR;
4777                 }
4778                 if (!compressed && extent_num_bytes != item_inline_len) {
4779                         error(
4780                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4781                                 root->objectid, fkey->objectid, fkey->offset,
4782                                 extent_num_bytes, item_inline_len);
4783                         err |= FILE_EXTENT_ERROR;
4784                 }
4785                 *end += extent_num_bytes;
4786                 *size += extent_num_bytes;
4787                 return err;
4788         }
4789
4790         /* Check extent type */
4791         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4792                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4793                 err |= FILE_EXTENT_ERROR;
4794                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4795                       root->objectid, fkey->objectid, fkey->offset);
4796                 return err;
4797         }
4798
4799         /* Check REG_EXTENT/PREALLOC_EXTENT */
4800         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4801         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4802         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4803         extent_offset = btrfs_file_extent_offset(node, fi);
4804         compressed = btrfs_file_extent_compression(node, fi);
4805         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4806
4807         /*
4808          * Check EXTENT_DATA csum
4809          *
4810          * For plain (uncompressed) extent, we should only check the range
4811          * we're referring to, as it's possible that part of prealloc extent
4812          * has been written, and has csum:
4813          *
4814          * |<--- Original large preallocated extent A ---->|
4815          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4816          *      No csum                         Has csum
4817          *
4818          * For compressed extent, we should check the whole range.
4819          */
4820         if (!compressed) {
4821                 search_start = disk_bytenr + extent_offset;
4822                 search_len = extent_num_bytes;
4823         } else {
4824                 search_start = disk_bytenr;
4825                 search_len = disk_num_bytes;
4826         }
4827         ret = count_csum_range(root, search_start, search_len, &csum_found);
4828         if (csum_found > 0 && nodatasum) {
4829                 err |= ODD_CSUM_ITEM;
4830                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4831                       root->objectid, fkey->objectid, fkey->offset);
4832         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4833                    !is_hole && (ret < 0 || csum_found < search_len)) {
4834                 err |= CSUM_ITEM_MISSING;
4835                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4836                       root->objectid, fkey->objectid, fkey->offset,
4837                       csum_found, search_len);
4838         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4839                 err |= ODD_CSUM_ITEM;
4840                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4841                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4842         }
4843
4844         /* Check EXTENT_DATA hole */
4845         if (!no_holes && *end != fkey->offset) {
4846                 err |= FILE_EXTENT_ERROR;
4847                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4848                       root->objectid, fkey->objectid, fkey->offset);
4849         }
4850
4851         *end += extent_num_bytes;
4852         if (!is_hole)
4853                 *size += extent_num_bytes;
4854
4855         return err;
4856 }
4857
4858 /*
4859  * Check INODE_ITEM and related ITEMs (the same inode number)
4860  * 1. check link count
4861  * 2. check inode ref/extref
4862  * 3. check dir item/index
4863  *
4864  * @ext_ref:    the EXTENDED_IREF feature
4865  *
4866  * Return 0 if no error occurred.
4867  * Return >0 for error or hit the traversal is done(by error bitmap)
4868  */
4869 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4870                             unsigned int ext_ref)
4871 {
4872         struct extent_buffer *node;
4873         struct btrfs_inode_item *ii;
4874         struct btrfs_key key;
4875         u64 inode_id;
4876         u32 mode;
4877         u64 nlink;
4878         u64 nbytes;
4879         u64 isize;
4880         u64 size = 0;
4881         u64 refs = 0;
4882         u64 extent_end = 0;
4883         u64 extent_size = 0;
4884         unsigned int dir;
4885         unsigned int nodatasum;
4886         int slot;
4887         int ret;
4888         int err = 0;
4889
4890         node = path->nodes[0];
4891         slot = path->slots[0];
4892
4893         btrfs_item_key_to_cpu(node, &key, slot);
4894         inode_id = key.objectid;
4895
4896         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4897                 ret = btrfs_next_item(root, path);
4898                 if (ret > 0)
4899                         err |= LAST_ITEM;
4900                 return err;
4901         }
4902
4903         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4904         isize = btrfs_inode_size(node, ii);
4905         nbytes = btrfs_inode_nbytes(node, ii);
4906         mode = btrfs_inode_mode(node, ii);
4907         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4908         nlink = btrfs_inode_nlink(node, ii);
4909         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4910
4911         while (1) {
4912                 ret = btrfs_next_item(root, path);
4913                 if (ret < 0) {
4914                         /* out will fill 'err' rusing current statistics */
4915                         goto out;
4916                 } else if (ret > 0) {
4917                         err |= LAST_ITEM;
4918                         goto out;
4919                 }
4920
4921                 node = path->nodes[0];
4922                 slot = path->slots[0];
4923                 btrfs_item_key_to_cpu(node, &key, slot);
4924                 if (key.objectid != inode_id)
4925                         goto out;
4926
4927                 switch (key.type) {
4928                 case BTRFS_INODE_REF_KEY:
4929                         ret = check_inode_ref(root, &key, node, slot, &refs,
4930                                               mode);
4931                         err |= ret;
4932                         break;
4933                 case BTRFS_INODE_EXTREF_KEY:
4934                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4935                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4936                                         root->objectid, key.objectid,
4937                                         key.offset);
4938                         ret = check_inode_extref(root, &key, node, slot, &refs,
4939                                                  mode);
4940                         err |= ret;
4941                         break;
4942                 case BTRFS_DIR_ITEM_KEY:
4943                 case BTRFS_DIR_INDEX_KEY:
4944                         if (!dir) {
4945                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4946                                         root->objectid, inode_id,
4947                                         imode_to_type(mode), key.objectid,
4948                                         key.offset);
4949                         }
4950                         ret = check_dir_item(root, &key, node, slot, &size,
4951                                              ext_ref);
4952                         err |= ret;
4953                         break;
4954                 case BTRFS_EXTENT_DATA_KEY:
4955                         if (dir) {
4956                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4957                                         root->objectid, inode_id, key.objectid,
4958                                         key.offset);
4959                         }
4960                         ret = check_file_extent(root, &key, node, slot,
4961                                                 nodatasum, &extent_size,
4962                                                 &extent_end);
4963                         err |= ret;
4964                         break;
4965                 case BTRFS_XATTR_ITEM_KEY:
4966                         break;
4967                 default:
4968                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4969                               key.objectid, key.type, key.offset);
4970                 }
4971         }
4972
4973 out:
4974         /* verify INODE_ITEM nlink/isize/nbytes */
4975         if (dir) {
4976                 if (nlink != 1) {
4977                         err |= LINK_COUNT_ERROR;
4978                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4979                               root->objectid, inode_id, nlink);
4980                 }
4981
4982                 /*
4983                  * Just a warning, as dir inode nbytes is just an
4984                  * instructive value.
4985                  */
4986                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4987                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4988                                 root->objectid, inode_id,
4989                                 root->fs_info->nodesize);
4990                 }
4991
4992                 if (isize != size) {
4993                         err |= ISIZE_ERROR;
4994                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4995                               root->objectid, inode_id, isize, size);
4996                 }
4997         } else {
4998                 if (nlink != refs) {
4999                         err |= LINK_COUNT_ERROR;
5000                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5001                               root->objectid, inode_id, nlink, refs);
5002                 } else if (!nlink) {
5003                         err |= ORPHAN_ITEM;
5004                 }
5005
5006                 if (!nbytes && !no_holes && extent_end < isize) {
5007                         err |= NBYTES_ERROR;
5008                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5009                               root->objectid, inode_id, isize);
5010                 }
5011
5012                 if (nbytes != extent_size) {
5013                         err |= NBYTES_ERROR;
5014                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5015                               root->objectid, inode_id, nbytes, extent_size);
5016                 }
5017         }
5018
5019         return err;
5020 }
5021
5022 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5023 {
5024         struct btrfs_path path;
5025         struct btrfs_key key;
5026         int err = 0;
5027         int ret;
5028
5029         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5030         key.type = BTRFS_INODE_ITEM_KEY;
5031         key.offset = 0;
5032
5033         /* For root being dropped, we don't need to check first inode */
5034         if (btrfs_root_refs(&root->root_item) == 0 &&
5035             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5036             key.objectid)
5037                 return 0;
5038
5039         btrfs_init_path(&path);
5040
5041         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5042         if (ret < 0)
5043                 goto out;
5044         if (ret > 0) {
5045                 ret = 0;
5046                 err |= INODE_ITEM_MISSING;
5047                 error("first inode item of root %llu is missing",
5048                       root->objectid);
5049         }
5050
5051         err |= check_inode_item(root, &path, ext_ref);
5052         err &= ~LAST_ITEM;
5053         if (err && !ret)
5054                 ret = -EIO;
5055 out:
5056         btrfs_release_path(&path);
5057         return ret;
5058 }
5059
5060 /*
5061  * Iterate all item on the tree and call check_inode_item() to check.
5062  *
5063  * @root:       the root of the tree to be checked.
5064  * @ext_ref:    the EXTENDED_IREF feature
5065  *
5066  * Return 0 if no error found.
5067  * Return <0 for error.
5068  */
5069 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5070 {
5071         struct btrfs_path path;
5072         struct node_refs nrefs;
5073         struct btrfs_root_item *root_item = &root->root_item;
5074         int ret;
5075         int level;
5076         int err = 0;
5077
5078         /*
5079          * We need to manually check the first inode item(256)
5080          * As the following traversal function will only start from
5081          * the first inode item in the leaf, if inode item(256) is missing
5082          * we will just skip it forever.
5083          */
5084         ret = check_fs_first_inode(root, ext_ref);
5085         if (ret < 0)
5086                 return ret;
5087
5088         memset(&nrefs, 0, sizeof(nrefs));
5089         level = btrfs_header_level(root->node);
5090         btrfs_init_path(&path);
5091
5092         if (btrfs_root_refs(root_item) > 0 ||
5093             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5094                 path.nodes[level] = root->node;
5095                 path.slots[level] = 0;
5096                 extent_buffer_get(root->node);
5097         } else {
5098                 struct btrfs_key key;
5099
5100                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5101                 level = root_item->drop_level;
5102                 path.lowest_level = level;
5103                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5104                 if (ret < 0)
5105                         goto out;
5106                 ret = 0;
5107         }
5108
5109         while (1) {
5110                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5111                 err |= !!ret;
5112
5113                 /* if ret is negative, walk shall stop */
5114                 if (ret < 0) {
5115                         ret = err;
5116                         break;
5117                 }
5118
5119                 ret = walk_up_tree_v2(root, &path, &level);
5120                 if (ret != 0) {
5121                         /* Normal exit, reset ret to err */
5122                         ret = err;
5123                         break;
5124                 }
5125         }
5126
5127 out:
5128         btrfs_release_path(&path);
5129         return ret;
5130 }
5131
5132 /*
5133  * Find the relative ref for root_ref and root_backref.
5134  *
5135  * @root:       the root of the root tree.
5136  * @ref_key:    the key of the root ref.
5137  *
5138  * Return 0 if no error occurred.
5139  */
5140 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5141                           struct extent_buffer *node, int slot)
5142 {
5143         struct btrfs_path path;
5144         struct btrfs_key key;
5145         struct btrfs_root_ref *ref;
5146         struct btrfs_root_ref *backref;
5147         char ref_name[BTRFS_NAME_LEN] = {0};
5148         char backref_name[BTRFS_NAME_LEN] = {0};
5149         u64 ref_dirid;
5150         u64 ref_seq;
5151         u32 ref_namelen;
5152         u64 backref_dirid;
5153         u64 backref_seq;
5154         u32 backref_namelen;
5155         u32 len;
5156         int ret;
5157         int err = 0;
5158
5159         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5160         ref_dirid = btrfs_root_ref_dirid(node, ref);
5161         ref_seq = btrfs_root_ref_sequence(node, ref);
5162         ref_namelen = btrfs_root_ref_name_len(node, ref);
5163
5164         if (ref_namelen <= BTRFS_NAME_LEN) {
5165                 len = ref_namelen;
5166         } else {
5167                 len = BTRFS_NAME_LEN;
5168                 warning("%s[%llu %llu] ref_name too long",
5169                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5170                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5171                         ref_key->offset);
5172         }
5173         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5174
5175         /* Find relative root_ref */
5176         key.objectid = ref_key->offset;
5177         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5178         key.offset = ref_key->objectid;
5179
5180         btrfs_init_path(&path);
5181         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5182         if (ret) {
5183                 err |= ROOT_REF_MISSING;
5184                 error("%s[%llu %llu] couldn't find relative ref",
5185                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5186                       "ROOT_REF" : "ROOT_BACKREF",
5187                       ref_key->objectid, ref_key->offset);
5188                 goto out;
5189         }
5190
5191         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5192                                  struct btrfs_root_ref);
5193         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5194         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5195         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5196
5197         if (backref_namelen <= BTRFS_NAME_LEN) {
5198                 len = backref_namelen;
5199         } else {
5200                 len = BTRFS_NAME_LEN;
5201                 warning("%s[%llu %llu] ref_name too long",
5202                         key.type == BTRFS_ROOT_REF_KEY ?
5203                         "ROOT_REF" : "ROOT_BACKREF",
5204                         key.objectid, key.offset);
5205         }
5206         read_extent_buffer(path.nodes[0], backref_name,
5207                            (unsigned long)(backref + 1), len);
5208
5209         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5210             ref_namelen != backref_namelen ||
5211             strncmp(ref_name, backref_name, len)) {
5212                 err |= ROOT_REF_MISMATCH;
5213                 error("%s[%llu %llu] mismatch relative ref",
5214                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5215                       "ROOT_REF" : "ROOT_BACKREF",
5216                       ref_key->objectid, ref_key->offset);
5217         }
5218 out:
5219         btrfs_release_path(&path);
5220         return err;
5221 }
5222
5223 /*
5224  * Check all fs/file tree in low_memory mode.
5225  *
5226  * 1. for fs tree root item, call check_fs_root_v2()
5227  * 2. for fs tree root ref/backref, call check_root_ref()
5228  *
5229  * Return 0 if no error occurred.
5230  */
5231 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5232 {
5233         struct btrfs_root *tree_root = fs_info->tree_root;
5234         struct btrfs_root *cur_root = NULL;
5235         struct btrfs_path path;
5236         struct btrfs_key key;
5237         struct extent_buffer *node;
5238         unsigned int ext_ref;
5239         int slot;
5240         int ret;
5241         int err = 0;
5242
5243         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5244
5245         btrfs_init_path(&path);
5246         key.objectid = BTRFS_FS_TREE_OBJECTID;
5247         key.offset = 0;
5248         key.type = BTRFS_ROOT_ITEM_KEY;
5249
5250         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5251         if (ret < 0) {
5252                 err = ret;
5253                 goto out;
5254         } else if (ret > 0) {
5255                 err = -ENOENT;
5256                 goto out;
5257         }
5258
5259         while (1) {
5260                 node = path.nodes[0];
5261                 slot = path.slots[0];
5262                 btrfs_item_key_to_cpu(node, &key, slot);
5263                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5264                         goto out;
5265                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5266                     fs_root_objectid(key.objectid)) {
5267                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5268                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5269                                                                        &key);
5270                         } else {
5271                                 key.offset = (u64)-1;
5272                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5273                         }
5274
5275                         if (IS_ERR(cur_root)) {
5276                                 error("Fail to read fs/subvol tree: %lld",
5277                                       key.objectid);
5278                                 err = -EIO;
5279                                 goto next;
5280                         }
5281
5282                         ret = check_fs_root_v2(cur_root, ext_ref);
5283                         err |= ret;
5284
5285                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5286                                 btrfs_free_fs_root(cur_root);
5287                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5288                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5289                         ret = check_root_ref(tree_root, &key, node, slot);
5290                         err |= ret;
5291                 }
5292 next:
5293                 ret = btrfs_next_item(tree_root, &path);
5294                 if (ret > 0)
5295                         goto out;
5296                 if (ret < 0) {
5297                         err = ret;
5298                         goto out;
5299                 }
5300         }
5301
5302 out:
5303         btrfs_release_path(&path);
5304         return err;
5305 }
5306
5307 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5308 {
5309         struct list_head *cur = rec->backrefs.next;
5310         struct extent_backref *back;
5311         struct tree_backref *tback;
5312         struct data_backref *dback;
5313         u64 found = 0;
5314         int err = 0;
5315
5316         while(cur != &rec->backrefs) {
5317                 back = to_extent_backref(cur);
5318                 cur = cur->next;
5319                 if (!back->found_extent_tree) {
5320                         err = 1;
5321                         if (!print_errs)
5322                                 goto out;
5323                         if (back->is_data) {
5324                                 dback = to_data_backref(back);
5325                                 fprintf(stderr, "Backref %llu %s %llu"
5326                                         " owner %llu offset %llu num_refs %lu"
5327                                         " not found in extent tree\n",
5328                                         (unsigned long long)rec->start,
5329                                         back->full_backref ?
5330                                         "parent" : "root",
5331                                         back->full_backref ?
5332                                         (unsigned long long)dback->parent:
5333                                         (unsigned long long)dback->root,
5334                                         (unsigned long long)dback->owner,
5335                                         (unsigned long long)dback->offset,
5336                                         (unsigned long)dback->num_refs);
5337                         } else {
5338                                 tback = to_tree_backref(back);
5339                                 fprintf(stderr, "Backref %llu parent %llu"
5340                                         " root %llu not found in extent tree\n",
5341                                         (unsigned long long)rec->start,
5342                                         (unsigned long long)tback->parent,
5343                                         (unsigned long long)tback->root);
5344                         }
5345                 }
5346                 if (!back->is_data && !back->found_ref) {
5347                         err = 1;
5348                         if (!print_errs)
5349                                 goto out;
5350                         tback = to_tree_backref(back);
5351                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5352                                 (unsigned long long)rec->start,
5353                                 back->full_backref ? "parent" : "root",
5354                                 back->full_backref ?
5355                                 (unsigned long long)tback->parent :
5356                                 (unsigned long long)tback->root, back);
5357                 }
5358                 if (back->is_data) {
5359                         dback = to_data_backref(back);
5360                         if (dback->found_ref != dback->num_refs) {
5361                                 err = 1;
5362                                 if (!print_errs)
5363                                         goto out;
5364                                 fprintf(stderr, "Incorrect local backref count"
5365                                         " on %llu %s %llu owner %llu"
5366                                         " offset %llu found %u wanted %u back %p\n",
5367                                         (unsigned long long)rec->start,
5368                                         back->full_backref ?
5369                                         "parent" : "root",
5370                                         back->full_backref ?
5371                                         (unsigned long long)dback->parent:
5372                                         (unsigned long long)dback->root,
5373                                         (unsigned long long)dback->owner,
5374                                         (unsigned long long)dback->offset,
5375                                         dback->found_ref, dback->num_refs, back);
5376                         }
5377                         if (dback->disk_bytenr != rec->start) {
5378                                 err = 1;
5379                                 if (!print_errs)
5380                                         goto out;
5381                                 fprintf(stderr, "Backref disk bytenr does not"
5382                                         " match extent record, bytenr=%llu, "
5383                                         "ref bytenr=%llu\n",
5384                                         (unsigned long long)rec->start,
5385                                         (unsigned long long)dback->disk_bytenr);
5386                         }
5387
5388                         if (dback->bytes != rec->nr) {
5389                                 err = 1;
5390                                 if (!print_errs)
5391                                         goto out;
5392                                 fprintf(stderr, "Backref bytes do not match "
5393                                         "extent backref, bytenr=%llu, ref "
5394                                         "bytes=%llu, backref bytes=%llu\n",
5395                                         (unsigned long long)rec->start,
5396                                         (unsigned long long)rec->nr,
5397                                         (unsigned long long)dback->bytes);
5398                         }
5399                 }
5400                 if (!back->is_data) {
5401                         found += 1;
5402                 } else {
5403                         dback = to_data_backref(back);
5404                         found += dback->found_ref;
5405                 }
5406         }
5407         if (found != rec->refs) {
5408                 err = 1;
5409                 if (!print_errs)
5410                         goto out;
5411                 fprintf(stderr, "Incorrect global backref count "
5412                         "on %llu found %llu wanted %llu\n",
5413                         (unsigned long long)rec->start,
5414                         (unsigned long long)found,
5415                         (unsigned long long)rec->refs);
5416         }
5417 out:
5418         return err;
5419 }
5420
5421 static int free_all_extent_backrefs(struct extent_record *rec)
5422 {
5423         struct extent_backref *back;
5424         struct list_head *cur;
5425         while (!list_empty(&rec->backrefs)) {
5426                 cur = rec->backrefs.next;
5427                 back = to_extent_backref(cur);
5428                 list_del(cur);
5429                 free(back);
5430         }
5431         return 0;
5432 }
5433
5434 static void free_extent_record_cache(struct cache_tree *extent_cache)
5435 {
5436         struct cache_extent *cache;
5437         struct extent_record *rec;
5438
5439         while (1) {
5440                 cache = first_cache_extent(extent_cache);
5441                 if (!cache)
5442                         break;
5443                 rec = container_of(cache, struct extent_record, cache);
5444                 remove_cache_extent(extent_cache, cache);
5445                 free_all_extent_backrefs(rec);
5446                 free(rec);
5447         }
5448 }
5449
5450 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5451                                  struct extent_record *rec)
5452 {
5453         if (rec->content_checked && rec->owner_ref_checked &&
5454             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5455             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5456             !rec->bad_full_backref && !rec->crossing_stripes &&
5457             !rec->wrong_chunk_type) {
5458                 remove_cache_extent(extent_cache, &rec->cache);
5459                 free_all_extent_backrefs(rec);
5460                 list_del_init(&rec->list);
5461                 free(rec);
5462         }
5463         return 0;
5464 }
5465
5466 static int check_owner_ref(struct btrfs_root *root,
5467                             struct extent_record *rec,
5468                             struct extent_buffer *buf)
5469 {
5470         struct extent_backref *node;
5471         struct tree_backref *back;
5472         struct btrfs_root *ref_root;
5473         struct btrfs_key key;
5474         struct btrfs_path path;
5475         struct extent_buffer *parent;
5476         int level;
5477         int found = 0;
5478         int ret;
5479
5480         list_for_each_entry(node, &rec->backrefs, list) {
5481                 if (node->is_data)
5482                         continue;
5483                 if (!node->found_ref)
5484                         continue;
5485                 if (node->full_backref)
5486                         continue;
5487                 back = to_tree_backref(node);
5488                 if (btrfs_header_owner(buf) == back->root)
5489                         return 0;
5490         }
5491         BUG_ON(rec->is_root);
5492
5493         /* try to find the block by search corresponding fs tree */
5494         key.objectid = btrfs_header_owner(buf);
5495         key.type = BTRFS_ROOT_ITEM_KEY;
5496         key.offset = (u64)-1;
5497
5498         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5499         if (IS_ERR(ref_root))
5500                 return 1;
5501
5502         level = btrfs_header_level(buf);
5503         if (level == 0)
5504                 btrfs_item_key_to_cpu(buf, &key, 0);
5505         else
5506                 btrfs_node_key_to_cpu(buf, &key, 0);
5507
5508         btrfs_init_path(&path);
5509         path.lowest_level = level + 1;
5510         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5511         if (ret < 0)
5512                 return 0;
5513
5514         parent = path.nodes[level + 1];
5515         if (parent && buf->start == btrfs_node_blockptr(parent,
5516                                                         path.slots[level + 1]))
5517                 found = 1;
5518
5519         btrfs_release_path(&path);
5520         return found ? 0 : 1;
5521 }
5522
5523 static int is_extent_tree_record(struct extent_record *rec)
5524 {
5525         struct list_head *cur = rec->backrefs.next;
5526         struct extent_backref *node;
5527         struct tree_backref *back;
5528         int is_extent = 0;
5529
5530         while(cur != &rec->backrefs) {
5531                 node = to_extent_backref(cur);
5532                 cur = cur->next;
5533                 if (node->is_data)
5534                         return 0;
5535                 back = to_tree_backref(node);
5536                 if (node->full_backref)
5537                         return 0;
5538                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5539                         is_extent = 1;
5540         }
5541         return is_extent;
5542 }
5543
5544
5545 static int record_bad_block_io(struct btrfs_fs_info *info,
5546                                struct cache_tree *extent_cache,
5547                                u64 start, u64 len)
5548 {
5549         struct extent_record *rec;
5550         struct cache_extent *cache;
5551         struct btrfs_key key;
5552
5553         cache = lookup_cache_extent(extent_cache, start, len);
5554         if (!cache)
5555                 return 0;
5556
5557         rec = container_of(cache, struct extent_record, cache);
5558         if (!is_extent_tree_record(rec))
5559                 return 0;
5560
5561         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5562         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5563 }
5564
5565 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5566                        struct extent_buffer *buf, int slot)
5567 {
5568         if (btrfs_header_level(buf)) {
5569                 struct btrfs_key_ptr ptr1, ptr2;
5570
5571                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5572                                    sizeof(struct btrfs_key_ptr));
5573                 read_extent_buffer(buf, &ptr2,
5574                                    btrfs_node_key_ptr_offset(slot + 1),
5575                                    sizeof(struct btrfs_key_ptr));
5576                 write_extent_buffer(buf, &ptr1,
5577                                     btrfs_node_key_ptr_offset(slot + 1),
5578                                     sizeof(struct btrfs_key_ptr));
5579                 write_extent_buffer(buf, &ptr2,
5580                                     btrfs_node_key_ptr_offset(slot),
5581                                     sizeof(struct btrfs_key_ptr));
5582                 if (slot == 0) {
5583                         struct btrfs_disk_key key;
5584                         btrfs_node_key(buf, &key, 0);
5585                         btrfs_fixup_low_keys(root, path, &key,
5586                                              btrfs_header_level(buf) + 1);
5587                 }
5588         } else {
5589                 struct btrfs_item *item1, *item2;
5590                 struct btrfs_key k1, k2;
5591                 char *item1_data, *item2_data;
5592                 u32 item1_offset, item2_offset, item1_size, item2_size;
5593
5594                 item1 = btrfs_item_nr(slot);
5595                 item2 = btrfs_item_nr(slot + 1);
5596                 btrfs_item_key_to_cpu(buf, &k1, slot);
5597                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5598                 item1_offset = btrfs_item_offset(buf, item1);
5599                 item2_offset = btrfs_item_offset(buf, item2);
5600                 item1_size = btrfs_item_size(buf, item1);
5601                 item2_size = btrfs_item_size(buf, item2);
5602
5603                 item1_data = malloc(item1_size);
5604                 if (!item1_data)
5605                         return -ENOMEM;
5606                 item2_data = malloc(item2_size);
5607                 if (!item2_data) {
5608                         free(item1_data);
5609                         return -ENOMEM;
5610                 }
5611
5612                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5613                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5614
5615                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5616                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5617                 free(item1_data);
5618                 free(item2_data);
5619
5620                 btrfs_set_item_offset(buf, item1, item2_offset);
5621                 btrfs_set_item_offset(buf, item2, item1_offset);
5622                 btrfs_set_item_size(buf, item1, item2_size);
5623                 btrfs_set_item_size(buf, item2, item1_size);
5624
5625                 path->slots[0] = slot;
5626                 btrfs_set_item_key_unsafe(root, path, &k2);
5627                 path->slots[0] = slot + 1;
5628                 btrfs_set_item_key_unsafe(root, path, &k1);
5629         }
5630         return 0;
5631 }
5632
5633 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5634 {
5635         struct extent_buffer *buf;
5636         struct btrfs_key k1, k2;
5637         int i;
5638         int level = path->lowest_level;
5639         int ret = -EIO;
5640
5641         buf = path->nodes[level];
5642         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5643                 if (level) {
5644                         btrfs_node_key_to_cpu(buf, &k1, i);
5645                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5646                 } else {
5647                         btrfs_item_key_to_cpu(buf, &k1, i);
5648                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5649                 }
5650                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5651                         continue;
5652                 ret = swap_values(root, path, buf, i);
5653                 if (ret)
5654                         break;
5655                 btrfs_mark_buffer_dirty(buf);
5656                 i = 0;
5657         }
5658         return ret;
5659 }
5660
5661 static int delete_bogus_item(struct btrfs_root *root,
5662                              struct btrfs_path *path,
5663                              struct extent_buffer *buf, int slot)
5664 {
5665         struct btrfs_key key;
5666         int nritems = btrfs_header_nritems(buf);
5667
5668         btrfs_item_key_to_cpu(buf, &key, slot);
5669
5670         /* These are all the keys we can deal with missing. */
5671         if (key.type != BTRFS_DIR_INDEX_KEY &&
5672             key.type != BTRFS_EXTENT_ITEM_KEY &&
5673             key.type != BTRFS_METADATA_ITEM_KEY &&
5674             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5675             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5676                 return -1;
5677
5678         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5679                (unsigned long long)key.objectid, key.type,
5680                (unsigned long long)key.offset, slot, buf->start);
5681         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5682                               btrfs_item_nr_offset(slot + 1),
5683                               sizeof(struct btrfs_item) *
5684                               (nritems - slot - 1));
5685         btrfs_set_header_nritems(buf, nritems - 1);
5686         if (slot == 0) {
5687                 struct btrfs_disk_key disk_key;
5688
5689                 btrfs_item_key(buf, &disk_key, 0);
5690                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5691         }
5692         btrfs_mark_buffer_dirty(buf);
5693         return 0;
5694 }
5695
5696 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5697 {
5698         struct extent_buffer *buf;
5699         int i;
5700         int ret = 0;
5701
5702         /* We should only get this for leaves */
5703         BUG_ON(path->lowest_level);
5704         buf = path->nodes[0];
5705 again:
5706         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5707                 unsigned int shift = 0, offset;
5708
5709                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5710                     BTRFS_LEAF_DATA_SIZE(root)) {
5711                         if (btrfs_item_end_nr(buf, i) >
5712                             BTRFS_LEAF_DATA_SIZE(root)) {
5713                                 ret = delete_bogus_item(root, path, buf, i);
5714                                 if (!ret)
5715                                         goto again;
5716                                 fprintf(stderr, "item is off the end of the "
5717                                         "leaf, can't fix\n");
5718                                 ret = -EIO;
5719                                 break;
5720                         }
5721                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5722                                 btrfs_item_end_nr(buf, i);
5723                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5724                            btrfs_item_offset_nr(buf, i - 1)) {
5725                         if (btrfs_item_end_nr(buf, i) >
5726                             btrfs_item_offset_nr(buf, i - 1)) {
5727                                 ret = delete_bogus_item(root, path, buf, i);
5728                                 if (!ret)
5729                                         goto again;
5730                                 fprintf(stderr, "items overlap, can't fix\n");
5731                                 ret = -EIO;
5732                                 break;
5733                         }
5734                         shift = btrfs_item_offset_nr(buf, i - 1) -
5735                                 btrfs_item_end_nr(buf, i);
5736                 }
5737                 if (!shift)
5738                         continue;
5739
5740                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5741                        i, shift, (unsigned long long)buf->start);
5742                 offset = btrfs_item_offset_nr(buf, i);
5743                 memmove_extent_buffer(buf,
5744                                       btrfs_leaf_data(buf) + offset + shift,
5745                                       btrfs_leaf_data(buf) + offset,
5746                                       btrfs_item_size_nr(buf, i));
5747                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5748                                       offset + shift);
5749                 btrfs_mark_buffer_dirty(buf);
5750         }
5751
5752         /*
5753          * We may have moved things, in which case we want to exit so we don't
5754          * write those changes out.  Once we have proper abort functionality in
5755          * progs this can be changed to something nicer.
5756          */
5757         BUG_ON(ret);
5758         return ret;
5759 }
5760
5761 /*
5762  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5763  * then just return -EIO.
5764  */
5765 static int try_to_fix_bad_block(struct btrfs_root *root,
5766                                 struct extent_buffer *buf,
5767                                 enum btrfs_tree_block_status status)
5768 {
5769         struct btrfs_trans_handle *trans;
5770         struct ulist *roots;
5771         struct ulist_node *node;
5772         struct btrfs_root *search_root;
5773         struct btrfs_path path;
5774         struct ulist_iterator iter;
5775         struct btrfs_key root_key, key;
5776         int ret;
5777
5778         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5779             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780                 return -EIO;
5781
5782         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5783         if (ret)
5784                 return -EIO;
5785
5786         btrfs_init_path(&path);
5787         ULIST_ITER_INIT(&iter);
5788         while ((node = ulist_next(roots, &iter))) {
5789                 root_key.objectid = node->val;
5790                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5791                 root_key.offset = (u64)-1;
5792
5793                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5794                 if (IS_ERR(root)) {
5795                         ret = -EIO;
5796                         break;
5797                 }
5798
5799
5800                 trans = btrfs_start_transaction(search_root, 0);
5801                 if (IS_ERR(trans)) {
5802                         ret = PTR_ERR(trans);
5803                         break;
5804                 }
5805
5806                 path.lowest_level = btrfs_header_level(buf);
5807                 path.skip_check_block = 1;
5808                 if (path.lowest_level)
5809                         btrfs_node_key_to_cpu(buf, &key, 0);
5810                 else
5811                         btrfs_item_key_to_cpu(buf, &key, 0);
5812                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5813                 if (ret) {
5814                         ret = -EIO;
5815                         btrfs_commit_transaction(trans, search_root);
5816                         break;
5817                 }
5818                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5819                         ret = fix_key_order(search_root, &path);
5820                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5821                         ret = fix_item_offset(search_root, &path);
5822                 if (ret) {
5823                         btrfs_commit_transaction(trans, search_root);
5824                         break;
5825                 }
5826                 btrfs_release_path(&path);
5827                 btrfs_commit_transaction(trans, search_root);
5828         }
5829         ulist_free(roots);
5830         btrfs_release_path(&path);
5831         return ret;
5832 }
5833
5834 static int check_block(struct btrfs_root *root,
5835                        struct cache_tree *extent_cache,
5836                        struct extent_buffer *buf, u64 flags)
5837 {
5838         struct extent_record *rec;
5839         struct cache_extent *cache;
5840         struct btrfs_key key;
5841         enum btrfs_tree_block_status status;
5842         int ret = 0;
5843         int level;
5844
5845         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5846         if (!cache)
5847                 return 1;
5848         rec = container_of(cache, struct extent_record, cache);
5849         rec->generation = btrfs_header_generation(buf);
5850
5851         level = btrfs_header_level(buf);
5852         if (btrfs_header_nritems(buf) > 0) {
5853
5854                 if (level == 0)
5855                         btrfs_item_key_to_cpu(buf, &key, 0);
5856                 else
5857                         btrfs_node_key_to_cpu(buf, &key, 0);
5858
5859                 rec->info_objectid = key.objectid;
5860         }
5861         rec->info_level = level;
5862
5863         if (btrfs_is_leaf(buf))
5864                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5865         else
5866                 status = btrfs_check_node(root, &rec->parent_key, buf);
5867
5868         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5869                 if (repair)
5870                         status = try_to_fix_bad_block(root, buf, status);
5871                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5872                         ret = -EIO;
5873                         fprintf(stderr, "bad block %llu\n",
5874                                 (unsigned long long)buf->start);
5875                 } else {
5876                         /*
5877                          * Signal to callers we need to start the scan over
5878                          * again since we'll have cowed blocks.
5879                          */
5880                         ret = -EAGAIN;
5881                 }
5882         } else {
5883                 rec->content_checked = 1;
5884                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5885                         rec->owner_ref_checked = 1;
5886                 else {
5887                         ret = check_owner_ref(root, rec, buf);
5888                         if (!ret)
5889                                 rec->owner_ref_checked = 1;
5890                 }
5891         }
5892         if (!ret)
5893                 maybe_free_extent_rec(extent_cache, rec);
5894         return ret;
5895 }
5896
5897 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5898                                                 u64 parent, u64 root)
5899 {
5900         struct list_head *cur = rec->backrefs.next;
5901         struct extent_backref *node;
5902         struct tree_backref *back;
5903
5904         while(cur != &rec->backrefs) {
5905                 node = to_extent_backref(cur);
5906                 cur = cur->next;
5907                 if (node->is_data)
5908                         continue;
5909                 back = to_tree_backref(node);
5910                 if (parent > 0) {
5911                         if (!node->full_backref)
5912                                 continue;
5913                         if (parent == back->parent)
5914                                 return back;
5915                 } else {
5916                         if (node->full_backref)
5917                                 continue;
5918                         if (back->root == root)
5919                                 return back;
5920                 }
5921         }
5922         return NULL;
5923 }
5924
5925 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5926                                                 u64 parent, u64 root)
5927 {
5928         struct tree_backref *ref = malloc(sizeof(*ref));
5929
5930         if (!ref)
5931                 return NULL;
5932         memset(&ref->node, 0, sizeof(ref->node));
5933         if (parent > 0) {
5934                 ref->parent = parent;
5935                 ref->node.full_backref = 1;
5936         } else {
5937                 ref->root = root;
5938                 ref->node.full_backref = 0;
5939         }
5940         list_add_tail(&ref->node.list, &rec->backrefs);
5941
5942         return ref;
5943 }
5944
5945 static struct data_backref *find_data_backref(struct extent_record *rec,
5946                                                 u64 parent, u64 root,
5947                                                 u64 owner, u64 offset,
5948                                                 int found_ref,
5949                                                 u64 disk_bytenr, u64 bytes)
5950 {
5951         struct list_head *cur = rec->backrefs.next;
5952         struct extent_backref *node;
5953         struct data_backref *back;
5954
5955         while(cur != &rec->backrefs) {
5956                 node = to_extent_backref(cur);
5957                 cur = cur->next;
5958                 if (!node->is_data)
5959                         continue;
5960                 back = to_data_backref(node);
5961                 if (parent > 0) {
5962                         if (!node->full_backref)
5963                                 continue;
5964                         if (parent == back->parent)
5965                                 return back;
5966                 } else {
5967                         if (node->full_backref)
5968                                 continue;
5969                         if (back->root == root && back->owner == owner &&
5970                             back->offset == offset) {
5971                                 if (found_ref && node->found_ref &&
5972                                     (back->bytes != bytes ||
5973                                     back->disk_bytenr != disk_bytenr))
5974                                         continue;
5975                                 return back;
5976                         }
5977                 }
5978         }
5979         return NULL;
5980 }
5981
5982 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5983                                                 u64 parent, u64 root,
5984                                                 u64 owner, u64 offset,
5985                                                 u64 max_size)
5986 {
5987         struct data_backref *ref = malloc(sizeof(*ref));
5988
5989         if (!ref)
5990                 return NULL;
5991         memset(&ref->node, 0, sizeof(ref->node));
5992         ref->node.is_data = 1;
5993
5994         if (parent > 0) {
5995                 ref->parent = parent;
5996                 ref->owner = 0;
5997                 ref->offset = 0;
5998                 ref->node.full_backref = 1;
5999         } else {
6000                 ref->root = root;
6001                 ref->owner = owner;
6002                 ref->offset = offset;
6003                 ref->node.full_backref = 0;
6004         }
6005         ref->bytes = max_size;
6006         ref->found_ref = 0;
6007         ref->num_refs = 0;
6008         list_add_tail(&ref->node.list, &rec->backrefs);
6009         if (max_size > rec->max_size)
6010                 rec->max_size = max_size;
6011         return ref;
6012 }
6013
6014 /* Check if the type of extent matches with its chunk */
6015 static void check_extent_type(struct extent_record *rec)
6016 {
6017         struct btrfs_block_group_cache *bg_cache;
6018
6019         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6020         if (!bg_cache)
6021                 return;
6022
6023         /* data extent, check chunk directly*/
6024         if (!rec->metadata) {
6025                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6026                         rec->wrong_chunk_type = 1;
6027                 return;
6028         }
6029
6030         /* metadata extent, check the obvious case first */
6031         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6032                                  BTRFS_BLOCK_GROUP_METADATA))) {
6033                 rec->wrong_chunk_type = 1;
6034                 return;
6035         }
6036
6037         /*
6038          * Check SYSTEM extent, as it's also marked as metadata, we can only
6039          * make sure it's a SYSTEM extent by its backref
6040          */
6041         if (!list_empty(&rec->backrefs)) {
6042                 struct extent_backref *node;
6043                 struct tree_backref *tback;
6044                 u64 bg_type;
6045
6046                 node = to_extent_backref(rec->backrefs.next);
6047                 if (node->is_data) {
6048                         /* tree block shouldn't have data backref */
6049                         rec->wrong_chunk_type = 1;
6050                         return;
6051                 }
6052                 tback = container_of(node, struct tree_backref, node);
6053
6054                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6055                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6056                 else
6057                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6058                 if (!(bg_cache->flags & bg_type))
6059                         rec->wrong_chunk_type = 1;
6060         }
6061 }
6062
6063 /*
6064  * Allocate a new extent record, fill default values from @tmpl and insert int
6065  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6066  * the cache, otherwise it fails.
6067  */
6068 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6069                 struct extent_record *tmpl)
6070 {
6071         struct extent_record *rec;
6072         int ret = 0;
6073
6074         BUG_ON(tmpl->max_size == 0);
6075         rec = malloc(sizeof(*rec));
6076         if (!rec)
6077                 return -ENOMEM;
6078         rec->start = tmpl->start;
6079         rec->max_size = tmpl->max_size;
6080         rec->nr = max(tmpl->nr, tmpl->max_size);
6081         rec->found_rec = tmpl->found_rec;
6082         rec->content_checked = tmpl->content_checked;
6083         rec->owner_ref_checked = tmpl->owner_ref_checked;
6084         rec->num_duplicates = 0;
6085         rec->metadata = tmpl->metadata;
6086         rec->flag_block_full_backref = FLAG_UNSET;
6087         rec->bad_full_backref = 0;
6088         rec->crossing_stripes = 0;
6089         rec->wrong_chunk_type = 0;
6090         rec->is_root = tmpl->is_root;
6091         rec->refs = tmpl->refs;
6092         rec->extent_item_refs = tmpl->extent_item_refs;
6093         rec->parent_generation = tmpl->parent_generation;
6094         INIT_LIST_HEAD(&rec->backrefs);
6095         INIT_LIST_HEAD(&rec->dups);
6096         INIT_LIST_HEAD(&rec->list);
6097         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6098         rec->cache.start = tmpl->start;
6099         rec->cache.size = tmpl->nr;
6100         ret = insert_cache_extent(extent_cache, &rec->cache);
6101         if (ret) {
6102                 free(rec);
6103                 return ret;
6104         }
6105         bytes_used += rec->nr;
6106
6107         if (tmpl->metadata)
6108                 rec->crossing_stripes = check_crossing_stripes(global_info,
6109                                 rec->start, global_info->nodesize);
6110         check_extent_type(rec);
6111         return ret;
6112 }
6113
6114 /*
6115  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6116  * some are hints:
6117  * - refs              - if found, increase refs
6118  * - is_root           - if found, set
6119  * - content_checked   - if found, set
6120  * - owner_ref_checked - if found, set
6121  *
6122  * If not found, create a new one, initialize and insert.
6123  */
6124 static int add_extent_rec(struct cache_tree *extent_cache,
6125                 struct extent_record *tmpl)
6126 {
6127         struct extent_record *rec;
6128         struct cache_extent *cache;
6129         int ret = 0;
6130         int dup = 0;
6131
6132         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6133         if (cache) {
6134                 rec = container_of(cache, struct extent_record, cache);
6135                 if (tmpl->refs)
6136                         rec->refs++;
6137                 if (rec->nr == 1)
6138                         rec->nr = max(tmpl->nr, tmpl->max_size);
6139
6140                 /*
6141                  * We need to make sure to reset nr to whatever the extent
6142                  * record says was the real size, this way we can compare it to
6143                  * the backrefs.
6144                  */
6145                 if (tmpl->found_rec) {
6146                         if (tmpl->start != rec->start || rec->found_rec) {
6147                                 struct extent_record *tmp;
6148
6149                                 dup = 1;
6150                                 if (list_empty(&rec->list))
6151                                         list_add_tail(&rec->list,
6152                                                       &duplicate_extents);
6153
6154                                 /*
6155                                  * We have to do this song and dance in case we
6156                                  * find an extent record that falls inside of
6157                                  * our current extent record but does not have
6158                                  * the same objectid.
6159                                  */
6160                                 tmp = malloc(sizeof(*tmp));
6161                                 if (!tmp)
6162                                         return -ENOMEM;
6163                                 tmp->start = tmpl->start;
6164                                 tmp->max_size = tmpl->max_size;
6165                                 tmp->nr = tmpl->nr;
6166                                 tmp->found_rec = 1;
6167                                 tmp->metadata = tmpl->metadata;
6168                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6169                                 INIT_LIST_HEAD(&tmp->list);
6170                                 list_add_tail(&tmp->list, &rec->dups);
6171                                 rec->num_duplicates++;
6172                         } else {
6173                                 rec->nr = tmpl->nr;
6174                                 rec->found_rec = 1;
6175                         }
6176                 }
6177
6178                 if (tmpl->extent_item_refs && !dup) {
6179                         if (rec->extent_item_refs) {
6180                                 fprintf(stderr, "block %llu rec "
6181                                         "extent_item_refs %llu, passed %llu\n",
6182                                         (unsigned long long)tmpl->start,
6183                                         (unsigned long long)
6184                                                         rec->extent_item_refs,
6185                                         (unsigned long long)tmpl->extent_item_refs);
6186                         }
6187                         rec->extent_item_refs = tmpl->extent_item_refs;
6188                 }
6189                 if (tmpl->is_root)
6190                         rec->is_root = 1;
6191                 if (tmpl->content_checked)
6192                         rec->content_checked = 1;
6193                 if (tmpl->owner_ref_checked)
6194                         rec->owner_ref_checked = 1;
6195                 memcpy(&rec->parent_key, &tmpl->parent_key,
6196                                 sizeof(tmpl->parent_key));
6197                 if (tmpl->parent_generation)
6198                         rec->parent_generation = tmpl->parent_generation;
6199                 if (rec->max_size < tmpl->max_size)
6200                         rec->max_size = tmpl->max_size;
6201
6202                 /*
6203                  * A metadata extent can't cross stripe_len boundary, otherwise
6204                  * kernel scrub won't be able to handle it.
6205                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6206                  * it.
6207                  */
6208                 if (tmpl->metadata)
6209                         rec->crossing_stripes = check_crossing_stripes(
6210                                         global_info, rec->start,
6211                                         global_info->nodesize);
6212                 check_extent_type(rec);
6213                 maybe_free_extent_rec(extent_cache, rec);
6214                 return ret;
6215         }
6216
6217         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6218
6219         return ret;
6220 }
6221
6222 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6223                             u64 parent, u64 root, int found_ref)
6224 {
6225         struct extent_record *rec;
6226         struct tree_backref *back;
6227         struct cache_extent *cache;
6228         int ret;
6229
6230         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6231         if (!cache) {
6232                 struct extent_record tmpl;
6233
6234                 memset(&tmpl, 0, sizeof(tmpl));
6235                 tmpl.start = bytenr;
6236                 tmpl.nr = 1;
6237                 tmpl.metadata = 1;
6238                 tmpl.max_size = 1;
6239
6240                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6241                 if (ret)
6242                         return ret;
6243
6244                 /* really a bug in cache_extent implement now */
6245                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6246                 if (!cache)
6247                         return -ENOENT;
6248         }
6249
6250         rec = container_of(cache, struct extent_record, cache);
6251         if (rec->start != bytenr) {
6252                 /*
6253                  * Several cause, from unaligned bytenr to over lapping extents
6254                  */
6255                 return -EEXIST;
6256         }
6257
6258         back = find_tree_backref(rec, parent, root);
6259         if (!back) {
6260                 back = alloc_tree_backref(rec, parent, root);
6261                 if (!back)
6262                         return -ENOMEM;
6263         }
6264
6265         if (found_ref) {
6266                 if (back->node.found_ref) {
6267                         fprintf(stderr, "Extent back ref already exists "
6268                                 "for %llu parent %llu root %llu \n",
6269                                 (unsigned long long)bytenr,
6270                                 (unsigned long long)parent,
6271                                 (unsigned long long)root);
6272                 }
6273                 back->node.found_ref = 1;
6274         } else {
6275                 if (back->node.found_extent_tree) {
6276                         fprintf(stderr, "Extent back ref already exists "
6277                                 "for %llu parent %llu root %llu \n",
6278                                 (unsigned long long)bytenr,
6279                                 (unsigned long long)parent,
6280                                 (unsigned long long)root);
6281                 }
6282                 back->node.found_extent_tree = 1;
6283         }
6284         check_extent_type(rec);
6285         maybe_free_extent_rec(extent_cache, rec);
6286         return 0;
6287 }
6288
6289 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6290                             u64 parent, u64 root, u64 owner, u64 offset,
6291                             u32 num_refs, int found_ref, u64 max_size)
6292 {
6293         struct extent_record *rec;
6294         struct data_backref *back;
6295         struct cache_extent *cache;
6296         int ret;
6297
6298         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6299         if (!cache) {
6300                 struct extent_record tmpl;
6301
6302                 memset(&tmpl, 0, sizeof(tmpl));
6303                 tmpl.start = bytenr;
6304                 tmpl.nr = 1;
6305                 tmpl.max_size = max_size;
6306
6307                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6308                 if (ret)
6309                         return ret;
6310
6311                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6312                 if (!cache)
6313                         abort();
6314         }
6315
6316         rec = container_of(cache, struct extent_record, cache);
6317         if (rec->max_size < max_size)
6318                 rec->max_size = max_size;
6319
6320         /*
6321          * If found_ref is set then max_size is the real size and must match the
6322          * existing refs.  So if we have already found a ref then we need to
6323          * make sure that this ref matches the existing one, otherwise we need
6324          * to add a new backref so we can notice that the backrefs don't match
6325          * and we need to figure out who is telling the truth.  This is to
6326          * account for that awful fsync bug I introduced where we'd end up with
6327          * a btrfs_file_extent_item that would have its length include multiple
6328          * prealloc extents or point inside of a prealloc extent.
6329          */
6330         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6331                                  bytenr, max_size);
6332         if (!back) {
6333                 back = alloc_data_backref(rec, parent, root, owner, offset,
6334                                           max_size);
6335                 BUG_ON(!back);
6336         }
6337
6338         if (found_ref) {
6339                 BUG_ON(num_refs != 1);
6340                 if (back->node.found_ref)
6341                         BUG_ON(back->bytes != max_size);
6342                 back->node.found_ref = 1;
6343                 back->found_ref += 1;
6344                 back->bytes = max_size;
6345                 back->disk_bytenr = bytenr;
6346                 rec->refs += 1;
6347                 rec->content_checked = 1;
6348                 rec->owner_ref_checked = 1;
6349         } else {
6350                 if (back->node.found_extent_tree) {
6351                         fprintf(stderr, "Extent back ref already exists "
6352                                 "for %llu parent %llu root %llu "
6353                                 "owner %llu offset %llu num_refs %lu\n",
6354                                 (unsigned long long)bytenr,
6355                                 (unsigned long long)parent,
6356                                 (unsigned long long)root,
6357                                 (unsigned long long)owner,
6358                                 (unsigned long long)offset,
6359                                 (unsigned long)num_refs);
6360                 }
6361                 back->num_refs = num_refs;
6362                 back->node.found_extent_tree = 1;
6363         }
6364         maybe_free_extent_rec(extent_cache, rec);
6365         return 0;
6366 }
6367
6368 static int add_pending(struct cache_tree *pending,
6369                        struct cache_tree *seen, u64 bytenr, u32 size)
6370 {
6371         int ret;
6372         ret = add_cache_extent(seen, bytenr, size);
6373         if (ret)
6374                 return ret;
6375         add_cache_extent(pending, bytenr, size);
6376         return 0;
6377 }
6378
6379 static int pick_next_pending(struct cache_tree *pending,
6380                         struct cache_tree *reada,
6381                         struct cache_tree *nodes,
6382                         u64 last, struct block_info *bits, int bits_nr,
6383                         int *reada_bits)
6384 {
6385         unsigned long node_start = last;
6386         struct cache_extent *cache;
6387         int ret;
6388
6389         cache = search_cache_extent(reada, 0);
6390         if (cache) {
6391                 bits[0].start = cache->start;
6392                 bits[0].size = cache->size;
6393                 *reada_bits = 1;
6394                 return 1;
6395         }
6396         *reada_bits = 0;
6397         if (node_start > 32768)
6398                 node_start -= 32768;
6399
6400         cache = search_cache_extent(nodes, node_start);
6401         if (!cache)
6402                 cache = search_cache_extent(nodes, 0);
6403
6404         if (!cache) {
6405                  cache = search_cache_extent(pending, 0);
6406                  if (!cache)
6407                          return 0;
6408                  ret = 0;
6409                  do {
6410                          bits[ret].start = cache->start;
6411                          bits[ret].size = cache->size;
6412                          cache = next_cache_extent(cache);
6413                          ret++;
6414                  } while (cache && ret < bits_nr);
6415                  return ret;
6416         }
6417
6418         ret = 0;
6419         do {
6420                 bits[ret].start = cache->start;
6421                 bits[ret].size = cache->size;
6422                 cache = next_cache_extent(cache);
6423                 ret++;
6424         } while (cache && ret < bits_nr);
6425
6426         if (bits_nr - ret > 8) {
6427                 u64 lookup = bits[0].start + bits[0].size;
6428                 struct cache_extent *next;
6429                 next = search_cache_extent(pending, lookup);
6430                 while(next) {
6431                         if (next->start - lookup > 32768)
6432                                 break;
6433                         bits[ret].start = next->start;
6434                         bits[ret].size = next->size;
6435                         lookup = next->start + next->size;
6436                         ret++;
6437                         if (ret == bits_nr)
6438                                 break;
6439                         next = next_cache_extent(next);
6440                         if (!next)
6441                                 break;
6442                 }
6443         }
6444         return ret;
6445 }
6446
6447 static void free_chunk_record(struct cache_extent *cache)
6448 {
6449         struct chunk_record *rec;
6450
6451         rec = container_of(cache, struct chunk_record, cache);
6452         list_del_init(&rec->list);
6453         list_del_init(&rec->dextents);
6454         free(rec);
6455 }
6456
6457 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6458 {
6459         cache_tree_free_extents(chunk_cache, free_chunk_record);
6460 }
6461
6462 static void free_device_record(struct rb_node *node)
6463 {
6464         struct device_record *rec;
6465
6466         rec = container_of(node, struct device_record, node);
6467         free(rec);
6468 }
6469
6470 FREE_RB_BASED_TREE(device_cache, free_device_record);
6471
6472 int insert_block_group_record(struct block_group_tree *tree,
6473                               struct block_group_record *bg_rec)
6474 {
6475         int ret;
6476
6477         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6478         if (ret)
6479                 return ret;
6480
6481         list_add_tail(&bg_rec->list, &tree->block_groups);
6482         return 0;
6483 }
6484
6485 static void free_block_group_record(struct cache_extent *cache)
6486 {
6487         struct block_group_record *rec;
6488
6489         rec = container_of(cache, struct block_group_record, cache);
6490         list_del_init(&rec->list);
6491         free(rec);
6492 }
6493
6494 void free_block_group_tree(struct block_group_tree *tree)
6495 {
6496         cache_tree_free_extents(&tree->tree, free_block_group_record);
6497 }
6498
6499 int insert_device_extent_record(struct device_extent_tree *tree,
6500                                 struct device_extent_record *de_rec)
6501 {
6502         int ret;
6503
6504         /*
6505          * Device extent is a bit different from the other extents, because
6506          * the extents which belong to the different devices may have the
6507          * same start and size, so we need use the special extent cache
6508          * search/insert functions.
6509          */
6510         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6511         if (ret)
6512                 return ret;
6513
6514         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6515         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6516         return 0;
6517 }
6518
6519 static void free_device_extent_record(struct cache_extent *cache)
6520 {
6521         struct device_extent_record *rec;
6522
6523         rec = container_of(cache, struct device_extent_record, cache);
6524         if (!list_empty(&rec->chunk_list))
6525                 list_del_init(&rec->chunk_list);
6526         if (!list_empty(&rec->device_list))
6527                 list_del_init(&rec->device_list);
6528         free(rec);
6529 }
6530
6531 void free_device_extent_tree(struct device_extent_tree *tree)
6532 {
6533         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6534 }
6535
6536 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6537 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6538                                  struct extent_buffer *leaf, int slot)
6539 {
6540         struct btrfs_extent_ref_v0 *ref0;
6541         struct btrfs_key key;
6542         int ret;
6543
6544         btrfs_item_key_to_cpu(leaf, &key, slot);
6545         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6546         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6547                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6548                                 0, 0);
6549         } else {
6550                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6551                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6552         }
6553         return ret;
6554 }
6555 #endif
6556
6557 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6558                                             struct btrfs_key *key,
6559                                             int slot)
6560 {
6561         struct btrfs_chunk *ptr;
6562         struct chunk_record *rec;
6563         int num_stripes, i;
6564
6565         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6566         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6567
6568         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6569         if (!rec) {
6570                 fprintf(stderr, "memory allocation failed\n");
6571                 exit(-1);
6572         }
6573
6574         INIT_LIST_HEAD(&rec->list);
6575         INIT_LIST_HEAD(&rec->dextents);
6576         rec->bg_rec = NULL;
6577
6578         rec->cache.start = key->offset;
6579         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6580
6581         rec->generation = btrfs_header_generation(leaf);
6582
6583         rec->objectid = key->objectid;
6584         rec->type = key->type;
6585         rec->offset = key->offset;
6586
6587         rec->length = rec->cache.size;
6588         rec->owner = btrfs_chunk_owner(leaf, ptr);
6589         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6590         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6591         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6592         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6593         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6594         rec->num_stripes = num_stripes;
6595         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6596
6597         for (i = 0; i < rec->num_stripes; ++i) {
6598                 rec->stripes[i].devid =
6599                         btrfs_stripe_devid_nr(leaf, ptr, i);
6600                 rec->stripes[i].offset =
6601                         btrfs_stripe_offset_nr(leaf, ptr, i);
6602                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6603                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6604                                 BTRFS_UUID_SIZE);
6605         }
6606
6607         return rec;
6608 }
6609
6610 static int process_chunk_item(struct cache_tree *chunk_cache,
6611                               struct btrfs_key *key, struct extent_buffer *eb,
6612                               int slot)
6613 {
6614         struct chunk_record *rec;
6615         struct btrfs_chunk *chunk;
6616         int ret = 0;
6617
6618         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6619         /*
6620          * Do extra check for this chunk item,
6621          *
6622          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6623          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6624          * and owner<->key_type check.
6625          */
6626         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6627                                       key->offset);
6628         if (ret < 0) {
6629                 error("chunk(%llu, %llu) is not valid, ignore it",
6630                       key->offset, btrfs_chunk_length(eb, chunk));
6631                 return 0;
6632         }
6633         rec = btrfs_new_chunk_record(eb, key, slot);
6634         ret = insert_cache_extent(chunk_cache, &rec->cache);
6635         if (ret) {
6636                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6637                         rec->offset, rec->length);
6638                 free(rec);
6639         }
6640
6641         return ret;
6642 }
6643
6644 static int process_device_item(struct rb_root *dev_cache,
6645                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6646 {
6647         struct btrfs_dev_item *ptr;
6648         struct device_record *rec;
6649         int ret = 0;
6650
6651         ptr = btrfs_item_ptr(eb,
6652                 slot, struct btrfs_dev_item);
6653
6654         rec = malloc(sizeof(*rec));
6655         if (!rec) {
6656                 fprintf(stderr, "memory allocation failed\n");
6657                 return -ENOMEM;
6658         }
6659
6660         rec->devid = key->offset;
6661         rec->generation = btrfs_header_generation(eb);
6662
6663         rec->objectid = key->objectid;
6664         rec->type = key->type;
6665         rec->offset = key->offset;
6666
6667         rec->devid = btrfs_device_id(eb, ptr);
6668         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6669         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6670
6671         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6672         if (ret) {
6673                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6674                 free(rec);
6675         }
6676
6677         return ret;
6678 }
6679
6680 struct block_group_record *
6681 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6682                              int slot)
6683 {
6684         struct btrfs_block_group_item *ptr;
6685         struct block_group_record *rec;
6686
6687         rec = calloc(1, sizeof(*rec));
6688         if (!rec) {
6689                 fprintf(stderr, "memory allocation failed\n");
6690                 exit(-1);
6691         }
6692
6693         rec->cache.start = key->objectid;
6694         rec->cache.size = key->offset;
6695
6696         rec->generation = btrfs_header_generation(leaf);
6697
6698         rec->objectid = key->objectid;
6699         rec->type = key->type;
6700         rec->offset = key->offset;
6701
6702         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6703         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6704
6705         INIT_LIST_HEAD(&rec->list);
6706
6707         return rec;
6708 }
6709
6710 static int process_block_group_item(struct block_group_tree *block_group_cache,
6711                                     struct btrfs_key *key,
6712                                     struct extent_buffer *eb, int slot)
6713 {
6714         struct block_group_record *rec;
6715         int ret = 0;
6716
6717         rec = btrfs_new_block_group_record(eb, key, slot);
6718         ret = insert_block_group_record(block_group_cache, rec);
6719         if (ret) {
6720                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6721                         rec->objectid, rec->offset);
6722                 free(rec);
6723         }
6724
6725         return ret;
6726 }
6727
6728 struct device_extent_record *
6729 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6730                                struct btrfs_key *key, int slot)
6731 {
6732         struct device_extent_record *rec;
6733         struct btrfs_dev_extent *ptr;
6734
6735         rec = calloc(1, sizeof(*rec));
6736         if (!rec) {
6737                 fprintf(stderr, "memory allocation failed\n");
6738                 exit(-1);
6739         }
6740
6741         rec->cache.objectid = key->objectid;
6742         rec->cache.start = key->offset;
6743
6744         rec->generation = btrfs_header_generation(leaf);
6745
6746         rec->objectid = key->objectid;
6747         rec->type = key->type;
6748         rec->offset = key->offset;
6749
6750         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6751         rec->chunk_objecteid =
6752                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6753         rec->chunk_offset =
6754                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6755         rec->length = btrfs_dev_extent_length(leaf, ptr);
6756         rec->cache.size = rec->length;
6757
6758         INIT_LIST_HEAD(&rec->chunk_list);
6759         INIT_LIST_HEAD(&rec->device_list);
6760
6761         return rec;
6762 }
6763
6764 static int
6765 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6766                            struct btrfs_key *key, struct extent_buffer *eb,
6767                            int slot)
6768 {
6769         struct device_extent_record *rec;
6770         int ret;
6771
6772         rec = btrfs_new_device_extent_record(eb, key, slot);
6773         ret = insert_device_extent_record(dev_extent_cache, rec);
6774         if (ret) {
6775                 fprintf(stderr,
6776                         "Device extent[%llu, %llu, %llu] existed.\n",
6777                         rec->objectid, rec->offset, rec->length);
6778                 free(rec);
6779         }
6780
6781         return ret;
6782 }
6783
6784 static int process_extent_item(struct btrfs_root *root,
6785                                struct cache_tree *extent_cache,
6786                                struct extent_buffer *eb, int slot)
6787 {
6788         struct btrfs_extent_item *ei;
6789         struct btrfs_extent_inline_ref *iref;
6790         struct btrfs_extent_data_ref *dref;
6791         struct btrfs_shared_data_ref *sref;
6792         struct btrfs_key key;
6793         struct extent_record tmpl;
6794         unsigned long end;
6795         unsigned long ptr;
6796         int ret;
6797         int type;
6798         u32 item_size = btrfs_item_size_nr(eb, slot);
6799         u64 refs = 0;
6800         u64 offset;
6801         u64 num_bytes;
6802         int metadata = 0;
6803
6804         btrfs_item_key_to_cpu(eb, &key, slot);
6805
6806         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6807                 metadata = 1;
6808                 num_bytes = root->fs_info->nodesize;
6809         } else {
6810                 num_bytes = key.offset;
6811         }
6812
6813         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6814                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6815                       key.objectid, root->fs_info->sectorsize);
6816                 return -EIO;
6817         }
6818         if (item_size < sizeof(*ei)) {
6819 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6820                 struct btrfs_extent_item_v0 *ei0;
6821                 BUG_ON(item_size != sizeof(*ei0));
6822                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6823                 refs = btrfs_extent_refs_v0(eb, ei0);
6824 #else
6825                 BUG();
6826 #endif
6827                 memset(&tmpl, 0, sizeof(tmpl));
6828                 tmpl.start = key.objectid;
6829                 tmpl.nr = num_bytes;
6830                 tmpl.extent_item_refs = refs;
6831                 tmpl.metadata = metadata;
6832                 tmpl.found_rec = 1;
6833                 tmpl.max_size = num_bytes;
6834
6835                 return add_extent_rec(extent_cache, &tmpl);
6836         }
6837
6838         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6839         refs = btrfs_extent_refs(eb, ei);
6840         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6841                 metadata = 1;
6842         else
6843                 metadata = 0;
6844         if (metadata && num_bytes != root->fs_info->nodesize) {
6845                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6846                       num_bytes, root->fs_info->nodesize);
6847                 return -EIO;
6848         }
6849         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6850                 error("ignore invalid data extent, length %llu is not aligned to %u",
6851                       num_bytes, root->fs_info->sectorsize);
6852                 return -EIO;
6853         }
6854
6855         memset(&tmpl, 0, sizeof(tmpl));
6856         tmpl.start = key.objectid;
6857         tmpl.nr = num_bytes;
6858         tmpl.extent_item_refs = refs;
6859         tmpl.metadata = metadata;
6860         tmpl.found_rec = 1;
6861         tmpl.max_size = num_bytes;
6862         add_extent_rec(extent_cache, &tmpl);
6863
6864         ptr = (unsigned long)(ei + 1);
6865         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6866             key.type == BTRFS_EXTENT_ITEM_KEY)
6867                 ptr += sizeof(struct btrfs_tree_block_info);
6868
6869         end = (unsigned long)ei + item_size;
6870         while (ptr < end) {
6871                 iref = (struct btrfs_extent_inline_ref *)ptr;
6872                 type = btrfs_extent_inline_ref_type(eb, iref);
6873                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6874                 switch (type) {
6875                 case BTRFS_TREE_BLOCK_REF_KEY:
6876                         ret = add_tree_backref(extent_cache, key.objectid,
6877                                         0, offset, 0);
6878                         if (ret < 0)
6879                                 error(
6880                         "add_tree_backref failed (extent items tree block): %s",
6881                                       strerror(-ret));
6882                         break;
6883                 case BTRFS_SHARED_BLOCK_REF_KEY:
6884                         ret = add_tree_backref(extent_cache, key.objectid,
6885                                         offset, 0, 0);
6886                         if (ret < 0)
6887                                 error(
6888                         "add_tree_backref failed (extent items shared block): %s",
6889                                       strerror(-ret));
6890                         break;
6891                 case BTRFS_EXTENT_DATA_REF_KEY:
6892                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6893                         add_data_backref(extent_cache, key.objectid, 0,
6894                                         btrfs_extent_data_ref_root(eb, dref),
6895                                         btrfs_extent_data_ref_objectid(eb,
6896                                                                        dref),
6897                                         btrfs_extent_data_ref_offset(eb, dref),
6898                                         btrfs_extent_data_ref_count(eb, dref),
6899                                         0, num_bytes);
6900                         break;
6901                 case BTRFS_SHARED_DATA_REF_KEY:
6902                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6903                         add_data_backref(extent_cache, key.objectid, offset,
6904                                         0, 0, 0,
6905                                         btrfs_shared_data_ref_count(eb, sref),
6906                                         0, num_bytes);
6907                         break;
6908                 default:
6909                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6910                                 key.objectid, key.type, num_bytes);
6911                         goto out;
6912                 }
6913                 ptr += btrfs_extent_inline_ref_size(type);
6914         }
6915         WARN_ON(ptr > end);
6916 out:
6917         return 0;
6918 }
6919
6920 static int check_cache_range(struct btrfs_root *root,
6921                              struct btrfs_block_group_cache *cache,
6922                              u64 offset, u64 bytes)
6923 {
6924         struct btrfs_free_space *entry;
6925         u64 *logical;
6926         u64 bytenr;
6927         int stripe_len;
6928         int i, nr, ret;
6929
6930         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6931                 bytenr = btrfs_sb_offset(i);
6932                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6933                                        cache->key.objectid, bytenr, 0,
6934                                        &logical, &nr, &stripe_len);
6935                 if (ret)
6936                         return ret;
6937
6938                 while (nr--) {
6939                         if (logical[nr] + stripe_len <= offset)
6940                                 continue;
6941                         if (offset + bytes <= logical[nr])
6942                                 continue;
6943                         if (logical[nr] == offset) {
6944                                 if (stripe_len >= bytes) {
6945                                         free(logical);
6946                                         return 0;
6947                                 }
6948                                 bytes -= stripe_len;
6949                                 offset += stripe_len;
6950                         } else if (logical[nr] < offset) {
6951                                 if (logical[nr] + stripe_len >=
6952                                     offset + bytes) {
6953                                         free(logical);
6954                                         return 0;
6955                                 }
6956                                 bytes = (offset + bytes) -
6957                                         (logical[nr] + stripe_len);
6958                                 offset = logical[nr] + stripe_len;
6959                         } else {
6960                                 /*
6961                                  * Could be tricky, the super may land in the
6962                                  * middle of the area we're checking.  First
6963                                  * check the easiest case, it's at the end.
6964                                  */
6965                                 if (logical[nr] + stripe_len >=
6966                                     bytes + offset) {
6967                                         bytes = logical[nr] - offset;
6968                                         continue;
6969                                 }
6970
6971                                 /* Check the left side */
6972                                 ret = check_cache_range(root, cache,
6973                                                         offset,
6974                                                         logical[nr] - offset);
6975                                 if (ret) {
6976                                         free(logical);
6977                                         return ret;
6978                                 }
6979
6980                                 /* Now we continue with the right side */
6981                                 bytes = (offset + bytes) -
6982                                         (logical[nr] + stripe_len);
6983                                 offset = logical[nr] + stripe_len;
6984                         }
6985                 }
6986
6987                 free(logical);
6988         }
6989
6990         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6991         if (!entry) {
6992                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6993                         offset, offset+bytes);
6994                 return -EINVAL;
6995         }
6996
6997         if (entry->offset != offset) {
6998                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6999                         entry->offset);
7000                 return -EINVAL;
7001         }
7002
7003         if (entry->bytes != bytes) {
7004                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7005                         bytes, entry->bytes, offset);
7006                 return -EINVAL;
7007         }
7008
7009         unlink_free_space(cache->free_space_ctl, entry);
7010         free(entry);
7011         return 0;
7012 }
7013
7014 static int verify_space_cache(struct btrfs_root *root,
7015                               struct btrfs_block_group_cache *cache)
7016 {
7017         struct btrfs_path path;
7018         struct extent_buffer *leaf;
7019         struct btrfs_key key;
7020         u64 last;
7021         int ret = 0;
7022
7023         root = root->fs_info->extent_root;
7024
7025         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7026
7027         btrfs_init_path(&path);
7028         key.objectid = last;
7029         key.offset = 0;
7030         key.type = BTRFS_EXTENT_ITEM_KEY;
7031         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7032         if (ret < 0)
7033                 goto out;
7034         ret = 0;
7035         while (1) {
7036                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7037                         ret = btrfs_next_leaf(root, &path);
7038                         if (ret < 0)
7039                                 goto out;
7040                         if (ret > 0) {
7041                                 ret = 0;
7042                                 break;
7043                         }
7044                 }
7045                 leaf = path.nodes[0];
7046                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7047                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7048                         break;
7049                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7050                     key.type != BTRFS_METADATA_ITEM_KEY) {
7051                         path.slots[0]++;
7052                         continue;
7053                 }
7054
7055                 if (last == key.objectid) {
7056                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7057                                 last = key.objectid + key.offset;
7058                         else
7059                                 last = key.objectid + root->fs_info->nodesize;
7060                         path.slots[0]++;
7061                         continue;
7062                 }
7063
7064                 ret = check_cache_range(root, cache, last,
7065                                         key.objectid - last);
7066                 if (ret)
7067                         break;
7068                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7069                         last = key.objectid + key.offset;
7070                 else
7071                         last = key.objectid + root->fs_info->nodesize;
7072                 path.slots[0]++;
7073         }
7074
7075         if (last < cache->key.objectid + cache->key.offset)
7076                 ret = check_cache_range(root, cache, last,
7077                                         cache->key.objectid +
7078                                         cache->key.offset - last);
7079
7080 out:
7081         btrfs_release_path(&path);
7082
7083         if (!ret &&
7084             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7085                 fprintf(stderr, "There are still entries left in the space "
7086                         "cache\n");
7087                 ret = -EINVAL;
7088         }
7089
7090         return ret;
7091 }
7092
7093 static int check_space_cache(struct btrfs_root *root)
7094 {
7095         struct btrfs_block_group_cache *cache;
7096         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7097         int ret;
7098         int error = 0;
7099
7100         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7101             btrfs_super_generation(root->fs_info->super_copy) !=
7102             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7103                 printf("cache and super generation don't match, space cache "
7104                        "will be invalidated\n");
7105                 return 0;
7106         }
7107
7108         if (ctx.progress_enabled) {
7109                 ctx.tp = TASK_FREE_SPACE;
7110                 task_start(ctx.info);
7111         }
7112
7113         while (1) {
7114                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7115                 if (!cache)
7116                         break;
7117
7118                 start = cache->key.objectid + cache->key.offset;
7119                 if (!cache->free_space_ctl) {
7120                         if (btrfs_init_free_space_ctl(cache,
7121                                                 root->fs_info->sectorsize)) {
7122                                 ret = -ENOMEM;
7123                                 break;
7124                         }
7125                 } else {
7126                         btrfs_remove_free_space_cache(cache);
7127                 }
7128
7129                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7130                         ret = exclude_super_stripes(root, cache);
7131                         if (ret) {
7132                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7133                                         strerror(-ret));
7134                                 error++;
7135                                 continue;
7136                         }
7137                         ret = load_free_space_tree(root->fs_info, cache);
7138                         free_excluded_extents(root, cache);
7139                         if (ret < 0) {
7140                                 fprintf(stderr, "could not load free space tree: %s\n",
7141                                         strerror(-ret));
7142                                 error++;
7143                                 continue;
7144                         }
7145                         error += ret;
7146                 } else {
7147                         ret = load_free_space_cache(root->fs_info, cache);
7148                         if (!ret)
7149                                 continue;
7150                 }
7151
7152                 ret = verify_space_cache(root, cache);
7153                 if (ret) {
7154                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7155                                 cache->key.objectid);
7156                         error++;
7157                 }
7158         }
7159
7160         task_stop(ctx.info);
7161
7162         return error ? -EINVAL : 0;
7163 }
7164
7165 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7166                         u64 num_bytes, unsigned long leaf_offset,
7167                         struct extent_buffer *eb) {
7168
7169         u64 offset = 0;
7170         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7171         char *data;
7172         unsigned long csum_offset;
7173         u32 csum;
7174         u32 csum_expected;
7175         u64 read_len;
7176         u64 data_checked = 0;
7177         u64 tmp;
7178         int ret = 0;
7179         int mirror;
7180         int num_copies;
7181
7182         if (num_bytes % root->fs_info->sectorsize)
7183                 return -EINVAL;
7184
7185         data = malloc(num_bytes);
7186         if (!data)
7187                 return -ENOMEM;
7188
7189         while (offset < num_bytes) {
7190                 mirror = 0;
7191 again:
7192                 read_len = num_bytes - offset;
7193                 /* read as much space once a time */
7194                 ret = read_extent_data(root, data + offset,
7195                                 bytenr + offset, &read_len, mirror);
7196                 if (ret)
7197                         goto out;
7198                 data_checked = 0;
7199                 /* verify every 4k data's checksum */
7200                 while (data_checked < read_len) {
7201                         csum = ~(u32)0;
7202                         tmp = offset + data_checked;
7203
7204                         csum = btrfs_csum_data((char *)data + tmp,
7205                                                csum, root->fs_info->sectorsize);
7206                         btrfs_csum_final(csum, (u8 *)&csum);
7207
7208                         csum_offset = leaf_offset +
7209                                  tmp / root->fs_info->sectorsize * csum_size;
7210                         read_extent_buffer(eb, (char *)&csum_expected,
7211                                            csum_offset, csum_size);
7212                         /* try another mirror */
7213                         if (csum != csum_expected) {
7214                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7215                                                 mirror, bytenr + tmp,
7216                                                 csum, csum_expected);
7217                                 num_copies = btrfs_num_copies(
7218                                                 &root->fs_info->mapping_tree,
7219                                                 bytenr, num_bytes);
7220                                 if (mirror < num_copies - 1) {
7221                                         mirror += 1;
7222                                         goto again;
7223                                 }
7224                         }
7225                         data_checked += root->fs_info->sectorsize;
7226                 }
7227                 offset += read_len;
7228         }
7229 out:
7230         free(data);
7231         return ret;
7232 }
7233
7234 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7235                                u64 num_bytes)
7236 {
7237         struct btrfs_path path;
7238         struct extent_buffer *leaf;
7239         struct btrfs_key key;
7240         int ret;
7241
7242         btrfs_init_path(&path);
7243         key.objectid = bytenr;
7244         key.type = BTRFS_EXTENT_ITEM_KEY;
7245         key.offset = (u64)-1;
7246
7247 again:
7248         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7249                                 0, 0);
7250         if (ret < 0) {
7251                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7252                 btrfs_release_path(&path);
7253                 return ret;
7254         } else if (ret) {
7255                 if (path.slots[0] > 0) {
7256                         path.slots[0]--;
7257                 } else {
7258                         ret = btrfs_prev_leaf(root, &path);
7259                         if (ret < 0) {
7260                                 goto out;
7261                         } else if (ret > 0) {
7262                                 ret = 0;
7263                                 goto out;
7264                         }
7265                 }
7266         }
7267
7268         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7269
7270         /*
7271          * Block group items come before extent items if they have the same
7272          * bytenr, so walk back one more just in case.  Dear future traveller,
7273          * first congrats on mastering time travel.  Now if it's not too much
7274          * trouble could you go back to 2006 and tell Chris to make the
7275          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7276          * EXTENT_ITEM_KEY please?
7277          */
7278         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7279                 if (path.slots[0] > 0) {
7280                         path.slots[0]--;
7281                 } else {
7282                         ret = btrfs_prev_leaf(root, &path);
7283                         if (ret < 0) {
7284                                 goto out;
7285                         } else if (ret > 0) {
7286                                 ret = 0;
7287                                 goto out;
7288                         }
7289                 }
7290                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7291         }
7292
7293         while (num_bytes) {
7294                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7295                         ret = btrfs_next_leaf(root, &path);
7296                         if (ret < 0) {
7297                                 fprintf(stderr, "Error going to next leaf "
7298                                         "%d\n", ret);
7299                                 btrfs_release_path(&path);
7300                                 return ret;
7301                         } else if (ret) {
7302                                 break;
7303                         }
7304                 }
7305                 leaf = path.nodes[0];
7306                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7307                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7308                         path.slots[0]++;
7309                         continue;
7310                 }
7311                 if (key.objectid + key.offset < bytenr) {
7312                         path.slots[0]++;
7313                         continue;
7314                 }
7315                 if (key.objectid > bytenr + num_bytes)
7316                         break;
7317
7318                 if (key.objectid == bytenr) {
7319                         if (key.offset >= num_bytes) {
7320                                 num_bytes = 0;
7321                                 break;
7322                         }
7323                         num_bytes -= key.offset;
7324                         bytenr += key.offset;
7325                 } else if (key.objectid < bytenr) {
7326                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7327                                 num_bytes = 0;
7328                                 break;
7329                         }
7330                         num_bytes = (bytenr + num_bytes) -
7331                                 (key.objectid + key.offset);
7332                         bytenr = key.objectid + key.offset;
7333                 } else {
7334                         if (key.objectid + key.offset < bytenr + num_bytes) {
7335                                 u64 new_start = key.objectid + key.offset;
7336                                 u64 new_bytes = bytenr + num_bytes - new_start;
7337
7338                                 /*
7339                                  * Weird case, the extent is in the middle of
7340                                  * our range, we'll have to search one side
7341                                  * and then the other.  Not sure if this happens
7342                                  * in real life, but no harm in coding it up
7343                                  * anyway just in case.
7344                                  */
7345                                 btrfs_release_path(&path);
7346                                 ret = check_extent_exists(root, new_start,
7347                                                           new_bytes);
7348                                 if (ret) {
7349                                         fprintf(stderr, "Right section didn't "
7350                                                 "have a record\n");
7351                                         break;
7352                                 }
7353                                 num_bytes = key.objectid - bytenr;
7354                                 goto again;
7355                         }
7356                         num_bytes = key.objectid - bytenr;
7357                 }
7358                 path.slots[0]++;
7359         }
7360         ret = 0;
7361
7362 out:
7363         if (num_bytes && !ret) {
7364                 fprintf(stderr, "There are no extents for csum range "
7365                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7366                 ret = 1;
7367         }
7368
7369         btrfs_release_path(&path);
7370         return ret;
7371 }
7372
7373 static int check_csums(struct btrfs_root *root)
7374 {
7375         struct btrfs_path path;
7376         struct extent_buffer *leaf;
7377         struct btrfs_key key;
7378         u64 offset = 0, num_bytes = 0;
7379         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7380         int errors = 0;
7381         int ret;
7382         u64 data_len;
7383         unsigned long leaf_offset;
7384
7385         root = root->fs_info->csum_root;
7386         if (!extent_buffer_uptodate(root->node)) {
7387                 fprintf(stderr, "No valid csum tree found\n");
7388                 return -ENOENT;
7389         }
7390
7391         btrfs_init_path(&path);
7392         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7393         key.type = BTRFS_EXTENT_CSUM_KEY;
7394         key.offset = 0;
7395         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7396         if (ret < 0) {
7397                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7398                 btrfs_release_path(&path);
7399                 return ret;
7400         }
7401
7402         if (ret > 0 && path.slots[0])
7403                 path.slots[0]--;
7404         ret = 0;
7405
7406         while (1) {
7407                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7408                         ret = btrfs_next_leaf(root, &path);
7409                         if (ret < 0) {
7410                                 fprintf(stderr, "Error going to next leaf "
7411                                         "%d\n", ret);
7412                                 break;
7413                         }
7414                         if (ret)
7415                                 break;
7416                 }
7417                 leaf = path.nodes[0];
7418
7419                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7420                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7421                         path.slots[0]++;
7422                         continue;
7423                 }
7424
7425                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7426                               csum_size) * root->fs_info->sectorsize;
7427                 if (!check_data_csum)
7428                         goto skip_csum_check;
7429                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7430                 ret = check_extent_csums(root, key.offset, data_len,
7431                                          leaf_offset, leaf);
7432                 if (ret)
7433                         break;
7434 skip_csum_check:
7435                 if (!num_bytes) {
7436                         offset = key.offset;
7437                 } else if (key.offset != offset + num_bytes) {
7438                         ret = check_extent_exists(root, offset, num_bytes);
7439                         if (ret) {
7440                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7441                                         "there is no extent record\n",
7442                                         offset, offset+num_bytes);
7443                                 errors++;
7444                         }
7445                         offset = key.offset;
7446                         num_bytes = 0;
7447                 }
7448                 num_bytes += data_len;
7449                 path.slots[0]++;
7450         }
7451
7452         btrfs_release_path(&path);
7453         return errors;
7454 }
7455
7456 static int is_dropped_key(struct btrfs_key *key,
7457                           struct btrfs_key *drop_key) {
7458         if (key->objectid < drop_key->objectid)
7459                 return 1;
7460         else if (key->objectid == drop_key->objectid) {
7461                 if (key->type < drop_key->type)
7462                         return 1;
7463                 else if (key->type == drop_key->type) {
7464                         if (key->offset < drop_key->offset)
7465                                 return 1;
7466                 }
7467         }
7468         return 0;
7469 }
7470
7471 /*
7472  * Here are the rules for FULL_BACKREF.
7473  *
7474  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7475  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7476  *      FULL_BACKREF set.
7477  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7478  *    if it happened after the relocation occurred since we'll have dropped the
7479  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7480  *    have no real way to know for sure.
7481  *
7482  * We process the blocks one root at a time, and we start from the lowest root
7483  * objectid and go to the highest.  So we can just lookup the owner backref for
7484  * the record and if we don't find it then we know it doesn't exist and we have
7485  * a FULL BACKREF.
7486  *
7487  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7488  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7489  * be set or not and then we can check later once we've gathered all the refs.
7490  */
7491 static int calc_extent_flag(struct cache_tree *extent_cache,
7492                            struct extent_buffer *buf,
7493                            struct root_item_record *ri,
7494                            u64 *flags)
7495 {
7496         struct extent_record *rec;
7497         struct cache_extent *cache;
7498         struct tree_backref *tback;
7499         u64 owner = 0;
7500
7501         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7502         /* we have added this extent before */
7503         if (!cache)
7504                 return -ENOENT;
7505
7506         rec = container_of(cache, struct extent_record, cache);
7507
7508         /*
7509          * Except file/reloc tree, we can not have
7510          * FULL BACKREF MODE
7511          */
7512         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7513                 goto normal;
7514         /*
7515          * root node
7516          */
7517         if (buf->start == ri->bytenr)
7518                 goto normal;
7519
7520         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7521                 goto full_backref;
7522
7523         owner = btrfs_header_owner(buf);
7524         if (owner == ri->objectid)
7525                 goto normal;
7526
7527         tback = find_tree_backref(rec, 0, owner);
7528         if (!tback)
7529                 goto full_backref;
7530 normal:
7531         *flags = 0;
7532         if (rec->flag_block_full_backref != FLAG_UNSET &&
7533             rec->flag_block_full_backref != 0)
7534                 rec->bad_full_backref = 1;
7535         return 0;
7536 full_backref:
7537         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7538         if (rec->flag_block_full_backref != FLAG_UNSET &&
7539             rec->flag_block_full_backref != 1)
7540                 rec->bad_full_backref = 1;
7541         return 0;
7542 }
7543
7544 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7545 {
7546         fprintf(stderr, "Invalid key type(");
7547         print_key_type(stderr, 0, key_type);
7548         fprintf(stderr, ") found in root(");
7549         print_objectid(stderr, rootid, 0);
7550         fprintf(stderr, ")\n");
7551 }
7552
7553 /*
7554  * Check if the key is valid with its extent buffer.
7555  *
7556  * This is a early check in case invalid key exists in a extent buffer
7557  * This is not comprehensive yet, but should prevent wrong key/item passed
7558  * further
7559  */
7560 static int check_type_with_root(u64 rootid, u8 key_type)
7561 {
7562         switch (key_type) {
7563         /* Only valid in chunk tree */
7564         case BTRFS_DEV_ITEM_KEY:
7565         case BTRFS_CHUNK_ITEM_KEY:
7566                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7567                         goto err;
7568                 break;
7569         /* valid in csum and log tree */
7570         case BTRFS_CSUM_TREE_OBJECTID:
7571                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7572                       is_fstree(rootid)))
7573                         goto err;
7574                 break;
7575         case BTRFS_EXTENT_ITEM_KEY:
7576         case BTRFS_METADATA_ITEM_KEY:
7577         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7578                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7579                         goto err;
7580                 break;
7581         case BTRFS_ROOT_ITEM_KEY:
7582                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7583                         goto err;
7584                 break;
7585         case BTRFS_DEV_EXTENT_KEY:
7586                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7587                         goto err;
7588                 break;
7589         }
7590         return 0;
7591 err:
7592         report_mismatch_key_root(key_type, rootid);
7593         return -EINVAL;
7594 }
7595
7596 static int run_next_block(struct btrfs_root *root,
7597                           struct block_info *bits,
7598                           int bits_nr,
7599                           u64 *last,
7600                           struct cache_tree *pending,
7601                           struct cache_tree *seen,
7602                           struct cache_tree *reada,
7603                           struct cache_tree *nodes,
7604                           struct cache_tree *extent_cache,
7605                           struct cache_tree *chunk_cache,
7606                           struct rb_root *dev_cache,
7607                           struct block_group_tree *block_group_cache,
7608                           struct device_extent_tree *dev_extent_cache,
7609                           struct root_item_record *ri)
7610 {
7611         struct extent_buffer *buf;
7612         struct extent_record *rec = NULL;
7613         u64 bytenr;
7614         u32 size;
7615         u64 parent;
7616         u64 owner;
7617         u64 flags;
7618         u64 ptr;
7619         u64 gen = 0;
7620         int ret = 0;
7621         int i;
7622         int nritems;
7623         struct btrfs_key key;
7624         struct cache_extent *cache;
7625         int reada_bits;
7626
7627         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7628                                     bits_nr, &reada_bits);
7629         if (nritems == 0)
7630                 return 1;
7631
7632         if (!reada_bits) {
7633                 for(i = 0; i < nritems; i++) {
7634                         ret = add_cache_extent(reada, bits[i].start,
7635                                                bits[i].size);
7636                         if (ret == -EEXIST)
7637                                 continue;
7638
7639                         /* fixme, get the parent transid */
7640                         readahead_tree_block(root, bits[i].start,
7641                                              bits[i].size, 0);
7642                 }
7643         }
7644         *last = bits[0].start;
7645         bytenr = bits[0].start;
7646         size = bits[0].size;
7647
7648         cache = lookup_cache_extent(pending, bytenr, size);
7649         if (cache) {
7650                 remove_cache_extent(pending, cache);
7651                 free(cache);
7652         }
7653         cache = lookup_cache_extent(reada, bytenr, size);
7654         if (cache) {
7655                 remove_cache_extent(reada, cache);
7656                 free(cache);
7657         }
7658         cache = lookup_cache_extent(nodes, bytenr, size);
7659         if (cache) {
7660                 remove_cache_extent(nodes, cache);
7661                 free(cache);
7662         }
7663         cache = lookup_cache_extent(extent_cache, bytenr, size);
7664         if (cache) {
7665                 rec = container_of(cache, struct extent_record, cache);
7666                 gen = rec->parent_generation;
7667         }
7668
7669         /* fixme, get the real parent transid */
7670         buf = read_tree_block(root->fs_info, bytenr, size, gen);
7671         if (!extent_buffer_uptodate(buf)) {
7672                 record_bad_block_io(root->fs_info,
7673                                     extent_cache, bytenr, size);
7674                 goto out;
7675         }
7676
7677         nritems = btrfs_header_nritems(buf);
7678
7679         flags = 0;
7680         if (!init_extent_tree) {
7681                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7682                                        btrfs_header_level(buf), 1, NULL,
7683                                        &flags);
7684                 if (ret < 0) {
7685                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7686                         if (ret < 0) {
7687                                 fprintf(stderr, "Couldn't calc extent flags\n");
7688                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7689                         }
7690                 }
7691         } else {
7692                 flags = 0;
7693                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7694                 if (ret < 0) {
7695                         fprintf(stderr, "Couldn't calc extent flags\n");
7696                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7697                 }
7698         }
7699
7700         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7701                 if (ri != NULL &&
7702                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7703                     ri->objectid == btrfs_header_owner(buf)) {
7704                         /*
7705                          * Ok we got to this block from it's original owner and
7706                          * we have FULL_BACKREF set.  Relocation can leave
7707                          * converted blocks over so this is altogether possible,
7708                          * however it's not possible if the generation > the
7709                          * last snapshot, so check for this case.
7710                          */
7711                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7712                             btrfs_header_generation(buf) > ri->last_snapshot) {
7713                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7714                                 rec->bad_full_backref = 1;
7715                         }
7716                 }
7717         } else {
7718                 if (ri != NULL &&
7719                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7720                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7721                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7722                         rec->bad_full_backref = 1;
7723                 }
7724         }
7725
7726         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7727                 rec->flag_block_full_backref = 1;
7728                 parent = bytenr;
7729                 owner = 0;
7730         } else {
7731                 rec->flag_block_full_backref = 0;
7732                 parent = 0;
7733                 owner = btrfs_header_owner(buf);
7734         }
7735
7736         ret = check_block(root, extent_cache, buf, flags);
7737         if (ret)
7738                 goto out;
7739
7740         if (btrfs_is_leaf(buf)) {
7741                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7742                 for (i = 0; i < nritems; i++) {
7743                         struct btrfs_file_extent_item *fi;
7744                         btrfs_item_key_to_cpu(buf, &key, i);
7745                         /*
7746                          * Check key type against the leaf owner.
7747                          * Could filter quite a lot of early error if
7748                          * owner is correct
7749                          */
7750                         if (check_type_with_root(btrfs_header_owner(buf),
7751                                                  key.type)) {
7752                                 fprintf(stderr, "ignoring invalid key\n");
7753                                 continue;
7754                         }
7755                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7756                                 process_extent_item(root, extent_cache, buf,
7757                                                     i);
7758                                 continue;
7759                         }
7760                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7761                                 process_extent_item(root, extent_cache, buf,
7762                                                     i);
7763                                 continue;
7764                         }
7765                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7766                                 total_csum_bytes +=
7767                                         btrfs_item_size_nr(buf, i);
7768                                 continue;
7769                         }
7770                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7771                                 process_chunk_item(chunk_cache, &key, buf, i);
7772                                 continue;
7773                         }
7774                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7775                                 process_device_item(dev_cache, &key, buf, i);
7776                                 continue;
7777                         }
7778                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7779                                 process_block_group_item(block_group_cache,
7780                                         &key, buf, i);
7781                                 continue;
7782                         }
7783                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7784                                 process_device_extent_item(dev_extent_cache,
7785                                         &key, buf, i);
7786                                 continue;
7787
7788                         }
7789                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7790 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7791                                 process_extent_ref_v0(extent_cache, buf, i);
7792 #else
7793                                 BUG();
7794 #endif
7795                                 continue;
7796                         }
7797
7798                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7799                                 ret = add_tree_backref(extent_cache,
7800                                                 key.objectid, 0, key.offset, 0);
7801                                 if (ret < 0)
7802                                         error(
7803                                 "add_tree_backref failed (leaf tree block): %s",
7804                                               strerror(-ret));
7805                                 continue;
7806                         }
7807                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7808                                 ret = add_tree_backref(extent_cache,
7809                                                 key.objectid, key.offset, 0, 0);
7810                                 if (ret < 0)
7811                                         error(
7812                                 "add_tree_backref failed (leaf shared block): %s",
7813                                               strerror(-ret));
7814                                 continue;
7815                         }
7816                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7817                                 struct btrfs_extent_data_ref *ref;
7818                                 ref = btrfs_item_ptr(buf, i,
7819                                                 struct btrfs_extent_data_ref);
7820                                 add_data_backref(extent_cache,
7821                                         key.objectid, 0,
7822                                         btrfs_extent_data_ref_root(buf, ref),
7823                                         btrfs_extent_data_ref_objectid(buf,
7824                                                                        ref),
7825                                         btrfs_extent_data_ref_offset(buf, ref),
7826                                         btrfs_extent_data_ref_count(buf, ref),
7827                                         0, root->fs_info->sectorsize);
7828                                 continue;
7829                         }
7830                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7831                                 struct btrfs_shared_data_ref *ref;
7832                                 ref = btrfs_item_ptr(buf, i,
7833                                                 struct btrfs_shared_data_ref);
7834                                 add_data_backref(extent_cache,
7835                                         key.objectid, key.offset, 0, 0, 0,
7836                                         btrfs_shared_data_ref_count(buf, ref),
7837                                         0, root->fs_info->sectorsize);
7838                                 continue;
7839                         }
7840                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7841                                 struct bad_item *bad;
7842
7843                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7844                                         continue;
7845                                 if (!owner)
7846                                         continue;
7847                                 bad = malloc(sizeof(struct bad_item));
7848                                 if (!bad)
7849                                         continue;
7850                                 INIT_LIST_HEAD(&bad->list);
7851                                 memcpy(&bad->key, &key,
7852                                        sizeof(struct btrfs_key));
7853                                 bad->root_id = owner;
7854                                 list_add_tail(&bad->list, &delete_items);
7855                                 continue;
7856                         }
7857                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7858                                 continue;
7859                         fi = btrfs_item_ptr(buf, i,
7860                                             struct btrfs_file_extent_item);
7861                         if (btrfs_file_extent_type(buf, fi) ==
7862                             BTRFS_FILE_EXTENT_INLINE)
7863                                 continue;
7864                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7865                                 continue;
7866
7867                         data_bytes_allocated +=
7868                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7869                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7870                                 abort();
7871                         }
7872                         data_bytes_referenced +=
7873                                 btrfs_file_extent_num_bytes(buf, fi);
7874                         add_data_backref(extent_cache,
7875                                 btrfs_file_extent_disk_bytenr(buf, fi),
7876                                 parent, owner, key.objectid, key.offset -
7877                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7878                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7879                 }
7880         } else {
7881                 int level;
7882                 struct btrfs_key first_key;
7883
7884                 first_key.objectid = 0;
7885
7886                 if (nritems > 0)
7887                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7888                 level = btrfs_header_level(buf);
7889                 for (i = 0; i < nritems; i++) {
7890                         struct extent_record tmpl;
7891
7892                         ptr = btrfs_node_blockptr(buf, i);
7893                         size = root->fs_info->nodesize;
7894                         btrfs_node_key_to_cpu(buf, &key, i);
7895                         if (ri != NULL) {
7896                                 if ((level == ri->drop_level)
7897                                     && is_dropped_key(&key, &ri->drop_key)) {
7898                                         continue;
7899                                 }
7900                         }
7901
7902                         memset(&tmpl, 0, sizeof(tmpl));
7903                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7904                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7905                         tmpl.start = ptr;
7906                         tmpl.nr = size;
7907                         tmpl.refs = 1;
7908                         tmpl.metadata = 1;
7909                         tmpl.max_size = size;
7910                         ret = add_extent_rec(extent_cache, &tmpl);
7911                         if (ret < 0)
7912                                 goto out;
7913
7914                         ret = add_tree_backref(extent_cache, ptr, parent,
7915                                         owner, 1);
7916                         if (ret < 0) {
7917                                 error(
7918                                 "add_tree_backref failed (non-leaf block): %s",
7919                                       strerror(-ret));
7920                                 continue;
7921                         }
7922
7923                         if (level > 1) {
7924                                 add_pending(nodes, seen, ptr, size);
7925                         } else {
7926                                 add_pending(pending, seen, ptr, size);
7927                         }
7928                 }
7929                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7930                                       nritems) * sizeof(struct btrfs_key_ptr);
7931         }
7932         total_btree_bytes += buf->len;
7933         if (fs_root_objectid(btrfs_header_owner(buf)))
7934                 total_fs_tree_bytes += buf->len;
7935         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7936                 total_extent_tree_bytes += buf->len;
7937         if (!found_old_backref &&
7938             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7939             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7940             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7941                 found_old_backref = 1;
7942 out:
7943         free_extent_buffer(buf);
7944         return ret;
7945 }
7946
7947 static int add_root_to_pending(struct extent_buffer *buf,
7948                                struct cache_tree *extent_cache,
7949                                struct cache_tree *pending,
7950                                struct cache_tree *seen,
7951                                struct cache_tree *nodes,
7952                                u64 objectid)
7953 {
7954         struct extent_record tmpl;
7955         int ret;
7956
7957         if (btrfs_header_level(buf) > 0)
7958                 add_pending(nodes, seen, buf->start, buf->len);
7959         else
7960                 add_pending(pending, seen, buf->start, buf->len);
7961
7962         memset(&tmpl, 0, sizeof(tmpl));
7963         tmpl.start = buf->start;
7964         tmpl.nr = buf->len;
7965         tmpl.is_root = 1;
7966         tmpl.refs = 1;
7967         tmpl.metadata = 1;
7968         tmpl.max_size = buf->len;
7969         add_extent_rec(extent_cache, &tmpl);
7970
7971         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7972             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7973                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7974                                 0, 1);
7975         else
7976                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7977                                 1);
7978         return ret;
7979 }
7980
7981 /* as we fix the tree, we might be deleting blocks that
7982  * we're tracking for repair.  This hook makes sure we
7983  * remove any backrefs for blocks as we are fixing them.
7984  */
7985 static int free_extent_hook(struct btrfs_trans_handle *trans,
7986                             struct btrfs_root *root,
7987                             u64 bytenr, u64 num_bytes, u64 parent,
7988                             u64 root_objectid, u64 owner, u64 offset,
7989                             int refs_to_drop)
7990 {
7991         struct extent_record *rec;
7992         struct cache_extent *cache;
7993         int is_data;
7994         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7995
7996         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7997         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7998         if (!cache)
7999                 return 0;
8000
8001         rec = container_of(cache, struct extent_record, cache);
8002         if (is_data) {
8003                 struct data_backref *back;
8004                 back = find_data_backref(rec, parent, root_objectid, owner,
8005                                          offset, 1, bytenr, num_bytes);
8006                 if (!back)
8007                         goto out;
8008                 if (back->node.found_ref) {
8009                         back->found_ref -= refs_to_drop;
8010                         if (rec->refs)
8011                                 rec->refs -= refs_to_drop;
8012                 }
8013                 if (back->node.found_extent_tree) {
8014                         back->num_refs -= refs_to_drop;
8015                         if (rec->extent_item_refs)
8016                                 rec->extent_item_refs -= refs_to_drop;
8017                 }
8018                 if (back->found_ref == 0)
8019                         back->node.found_ref = 0;
8020                 if (back->num_refs == 0)
8021                         back->node.found_extent_tree = 0;
8022
8023                 if (!back->node.found_extent_tree && back->node.found_ref) {
8024                         list_del(&back->node.list);
8025                         free(back);
8026                 }
8027         } else {
8028                 struct tree_backref *back;
8029                 back = find_tree_backref(rec, parent, root_objectid);
8030                 if (!back)
8031                         goto out;
8032                 if (back->node.found_ref) {
8033                         if (rec->refs)
8034                                 rec->refs--;
8035                         back->node.found_ref = 0;
8036                 }
8037                 if (back->node.found_extent_tree) {
8038                         if (rec->extent_item_refs)
8039                                 rec->extent_item_refs--;
8040                         back->node.found_extent_tree = 0;
8041                 }
8042                 if (!back->node.found_extent_tree && back->node.found_ref) {
8043                         list_del(&back->node.list);
8044                         free(back);
8045                 }
8046         }
8047         maybe_free_extent_rec(extent_cache, rec);
8048 out:
8049         return 0;
8050 }
8051
8052 static int delete_extent_records(struct btrfs_trans_handle *trans,
8053                                  struct btrfs_root *root,
8054                                  struct btrfs_path *path,
8055                                  u64 bytenr)
8056 {
8057         struct btrfs_key key;
8058         struct btrfs_key found_key;
8059         struct extent_buffer *leaf;
8060         int ret;
8061         int slot;
8062
8063
8064         key.objectid = bytenr;
8065         key.type = (u8)-1;
8066         key.offset = (u64)-1;
8067
8068         while(1) {
8069                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8070                                         &key, path, 0, 1);
8071                 if (ret < 0)
8072                         break;
8073
8074                 if (ret > 0) {
8075                         ret = 0;
8076                         if (path->slots[0] == 0)
8077                                 break;
8078                         path->slots[0]--;
8079                 }
8080                 ret = 0;
8081
8082                 leaf = path->nodes[0];
8083                 slot = path->slots[0];
8084
8085                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8086                 if (found_key.objectid != bytenr)
8087                         break;
8088
8089                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8090                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8091                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8092                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8093                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8094                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8095                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8096                         btrfs_release_path(path);
8097                         if (found_key.type == 0) {
8098                                 if (found_key.offset == 0)
8099                                         break;
8100                                 key.offset = found_key.offset - 1;
8101                                 key.type = found_key.type;
8102                         }
8103                         key.type = found_key.type - 1;
8104                         key.offset = (u64)-1;
8105                         continue;
8106                 }
8107
8108                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8109                         found_key.objectid, found_key.type, found_key.offset);
8110
8111                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8112                 if (ret)
8113                         break;
8114                 btrfs_release_path(path);
8115
8116                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8117                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8118                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8119                                 found_key.offset : root->fs_info->nodesize;
8120
8121                         ret = btrfs_update_block_group(trans, root, bytenr,
8122                                                        bytes, 0, 0);
8123                         if (ret)
8124                                 break;
8125                 }
8126         }
8127
8128         btrfs_release_path(path);
8129         return ret;
8130 }
8131
8132 /*
8133  * for a single backref, this will allocate a new extent
8134  * and add the backref to it.
8135  */
8136 static int record_extent(struct btrfs_trans_handle *trans,
8137                          struct btrfs_fs_info *info,
8138                          struct btrfs_path *path,
8139                          struct extent_record *rec,
8140                          struct extent_backref *back,
8141                          int allocated, u64 flags)
8142 {
8143         int ret = 0;
8144         struct btrfs_root *extent_root = info->extent_root;
8145         struct extent_buffer *leaf;
8146         struct btrfs_key ins_key;
8147         struct btrfs_extent_item *ei;
8148         struct data_backref *dback;
8149         struct btrfs_tree_block_info *bi;
8150
8151         if (!back->is_data)
8152                 rec->max_size = max_t(u64, rec->max_size,
8153                                     info->nodesize);
8154
8155         if (!allocated) {
8156                 u32 item_size = sizeof(*ei);
8157
8158                 if (!back->is_data)
8159                         item_size += sizeof(*bi);
8160
8161                 ins_key.objectid = rec->start;
8162                 ins_key.offset = rec->max_size;
8163                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8164
8165                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8166                                         &ins_key, item_size);
8167                 if (ret)
8168                         goto fail;
8169
8170                 leaf = path->nodes[0];
8171                 ei = btrfs_item_ptr(leaf, path->slots[0],
8172                                     struct btrfs_extent_item);
8173
8174                 btrfs_set_extent_refs(leaf, ei, 0);
8175                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8176
8177                 if (back->is_data) {
8178                         btrfs_set_extent_flags(leaf, ei,
8179                                                BTRFS_EXTENT_FLAG_DATA);
8180                 } else {
8181                         struct btrfs_disk_key copy_key;;
8182
8183                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8184                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8185                                              sizeof(*bi));
8186
8187                         btrfs_set_disk_key_objectid(&copy_key,
8188                                                     rec->info_objectid);
8189                         btrfs_set_disk_key_type(&copy_key, 0);
8190                         btrfs_set_disk_key_offset(&copy_key, 0);
8191
8192                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8193                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8194
8195                         btrfs_set_extent_flags(leaf, ei,
8196                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8197                 }
8198
8199                 btrfs_mark_buffer_dirty(leaf);
8200                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8201                                                rec->max_size, 1, 0);
8202                 if (ret)
8203                         goto fail;
8204                 btrfs_release_path(path);
8205         }
8206
8207         if (back->is_data) {
8208                 u64 parent;
8209                 int i;
8210
8211                 dback = to_data_backref(back);
8212                 if (back->full_backref)
8213                         parent = dback->parent;
8214                 else
8215                         parent = 0;
8216
8217                 for (i = 0; i < dback->found_ref; i++) {
8218                         /* if parent != 0, we're doing a full backref
8219                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8220                          * just makes the backref allocator create a data
8221                          * backref
8222                          */
8223                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8224                                                    rec->start, rec->max_size,
8225                                                    parent,
8226                                                    dback->root,
8227                                                    parent ?
8228                                                    BTRFS_FIRST_FREE_OBJECTID :
8229                                                    dback->owner,
8230                                                    dback->offset);
8231                         if (ret)
8232                                 break;
8233                 }
8234                 fprintf(stderr, "adding new data backref"
8235                                 " on %llu %s %llu owner %llu"
8236                                 " offset %llu found %d\n",
8237                                 (unsigned long long)rec->start,
8238                                 back->full_backref ?
8239                                 "parent" : "root",
8240                                 back->full_backref ?
8241                                 (unsigned long long)parent :
8242                                 (unsigned long long)dback->root,
8243                                 (unsigned long long)dback->owner,
8244                                 (unsigned long long)dback->offset,
8245                                 dback->found_ref);
8246         } else {
8247                 u64 parent;
8248                 struct tree_backref *tback;
8249
8250                 tback = to_tree_backref(back);
8251                 if (back->full_backref)
8252                         parent = tback->parent;
8253                 else
8254                         parent = 0;
8255
8256                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8257                                            rec->start, rec->max_size,
8258                                            parent, tback->root, 0, 0);
8259                 fprintf(stderr, "adding new tree backref on "
8260                         "start %llu len %llu parent %llu root %llu\n",
8261                         rec->start, rec->max_size, parent, tback->root);
8262         }
8263 fail:
8264         btrfs_release_path(path);
8265         return ret;
8266 }
8267
8268 static struct extent_entry *find_entry(struct list_head *entries,
8269                                        u64 bytenr, u64 bytes)
8270 {
8271         struct extent_entry *entry = NULL;
8272
8273         list_for_each_entry(entry, entries, list) {
8274                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8275                         return entry;
8276         }
8277
8278         return NULL;
8279 }
8280
8281 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8282 {
8283         struct extent_entry *entry, *best = NULL, *prev = NULL;
8284
8285         list_for_each_entry(entry, entries, list) {
8286                 /*
8287                  * If there are as many broken entries as entries then we know
8288                  * not to trust this particular entry.
8289                  */
8290                 if (entry->broken == entry->count)
8291                         continue;
8292
8293                 /*
8294                  * Special case, when there are only two entries and 'best' is
8295                  * the first one
8296                  */
8297                 if (!prev) {
8298                         best = entry;
8299                         prev = entry;
8300                         continue;
8301                 }
8302
8303                 /*
8304                  * If our current entry == best then we can't be sure our best
8305                  * is really the best, so we need to keep searching.
8306                  */
8307                 if (best && best->count == entry->count) {
8308                         prev = entry;
8309                         best = NULL;
8310                         continue;
8311                 }
8312
8313                 /* Prev == entry, not good enough, have to keep searching */
8314                 if (!prev->broken && prev->count == entry->count)
8315                         continue;
8316
8317                 if (!best)
8318                         best = (prev->count > entry->count) ? prev : entry;
8319                 else if (best->count < entry->count)
8320                         best = entry;
8321                 prev = entry;
8322         }
8323
8324         return best;
8325 }
8326
8327 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8328                       struct data_backref *dback, struct extent_entry *entry)
8329 {
8330         struct btrfs_trans_handle *trans;
8331         struct btrfs_root *root;
8332         struct btrfs_file_extent_item *fi;
8333         struct extent_buffer *leaf;
8334         struct btrfs_key key;
8335         u64 bytenr, bytes;
8336         int ret, err;
8337
8338         key.objectid = dback->root;
8339         key.type = BTRFS_ROOT_ITEM_KEY;
8340         key.offset = (u64)-1;
8341         root = btrfs_read_fs_root(info, &key);
8342         if (IS_ERR(root)) {
8343                 fprintf(stderr, "Couldn't find root for our ref\n");
8344                 return -EINVAL;
8345         }
8346
8347         /*
8348          * The backref points to the original offset of the extent if it was
8349          * split, so we need to search down to the offset we have and then walk
8350          * forward until we find the backref we're looking for.
8351          */
8352         key.objectid = dback->owner;
8353         key.type = BTRFS_EXTENT_DATA_KEY;
8354         key.offset = dback->offset;
8355         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8356         if (ret < 0) {
8357                 fprintf(stderr, "Error looking up ref %d\n", ret);
8358                 return ret;
8359         }
8360
8361         while (1) {
8362                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8363                         ret = btrfs_next_leaf(root, path);
8364                         if (ret) {
8365                                 fprintf(stderr, "Couldn't find our ref, next\n");
8366                                 return -EINVAL;
8367                         }
8368                 }
8369                 leaf = path->nodes[0];
8370                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8371                 if (key.objectid != dback->owner ||
8372                     key.type != BTRFS_EXTENT_DATA_KEY) {
8373                         fprintf(stderr, "Couldn't find our ref, search\n");
8374                         return -EINVAL;
8375                 }
8376                 fi = btrfs_item_ptr(leaf, path->slots[0],
8377                                     struct btrfs_file_extent_item);
8378                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8379                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8380
8381                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8382                         break;
8383                 path->slots[0]++;
8384         }
8385
8386         btrfs_release_path(path);
8387
8388         trans = btrfs_start_transaction(root, 1);
8389         if (IS_ERR(trans))
8390                 return PTR_ERR(trans);
8391
8392         /*
8393          * Ok we have the key of the file extent we want to fix, now we can cow
8394          * down to the thing and fix it.
8395          */
8396         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8397         if (ret < 0) {
8398                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8399                         key.objectid, key.type, key.offset, ret);
8400                 goto out;
8401         }
8402         if (ret > 0) {
8403                 fprintf(stderr, "Well that's odd, we just found this key "
8404                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8405                         key.offset);
8406                 ret = -EINVAL;
8407                 goto out;
8408         }
8409         leaf = path->nodes[0];
8410         fi = btrfs_item_ptr(leaf, path->slots[0],
8411                             struct btrfs_file_extent_item);
8412
8413         if (btrfs_file_extent_compression(leaf, fi) &&
8414             dback->disk_bytenr != entry->bytenr) {
8415                 fprintf(stderr, "Ref doesn't match the record start and is "
8416                         "compressed, please take a btrfs-image of this file "
8417                         "system and send it to a btrfs developer so they can "
8418                         "complete this functionality for bytenr %Lu\n",
8419                         dback->disk_bytenr);
8420                 ret = -EINVAL;
8421                 goto out;
8422         }
8423
8424         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8425                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8426         } else if (dback->disk_bytenr > entry->bytenr) {
8427                 u64 off_diff, offset;
8428
8429                 off_diff = dback->disk_bytenr - entry->bytenr;
8430                 offset = btrfs_file_extent_offset(leaf, fi);
8431                 if (dback->disk_bytenr + offset +
8432                     btrfs_file_extent_num_bytes(leaf, fi) >
8433                     entry->bytenr + entry->bytes) {
8434                         fprintf(stderr, "Ref is past the entry end, please "
8435                                 "take a btrfs-image of this file system and "
8436                                 "send it to a btrfs developer, ref %Lu\n",
8437                                 dback->disk_bytenr);
8438                         ret = -EINVAL;
8439                         goto out;
8440                 }
8441                 offset += off_diff;
8442                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8443                 btrfs_set_file_extent_offset(leaf, fi, offset);
8444         } else if (dback->disk_bytenr < entry->bytenr) {
8445                 u64 offset;
8446
8447                 offset = btrfs_file_extent_offset(leaf, fi);
8448                 if (dback->disk_bytenr + offset < entry->bytenr) {
8449                         fprintf(stderr, "Ref is before the entry start, please"
8450                                 " take a btrfs-image of this file system and "
8451                                 "send it to a btrfs developer, ref %Lu\n",
8452                                 dback->disk_bytenr);
8453                         ret = -EINVAL;
8454                         goto out;
8455                 }
8456
8457                 offset += dback->disk_bytenr;
8458                 offset -= entry->bytenr;
8459                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8460                 btrfs_set_file_extent_offset(leaf, fi, offset);
8461         }
8462
8463         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8464
8465         /*
8466          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8467          * only do this if we aren't using compression, otherwise it's a
8468          * trickier case.
8469          */
8470         if (!btrfs_file_extent_compression(leaf, fi))
8471                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8472         else
8473                 printf("ram bytes may be wrong?\n");
8474         btrfs_mark_buffer_dirty(leaf);
8475 out:
8476         err = btrfs_commit_transaction(trans, root);
8477         btrfs_release_path(path);
8478         return ret ? ret : err;
8479 }
8480
8481 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8482                            struct extent_record *rec)
8483 {
8484         struct extent_backref *back;
8485         struct data_backref *dback;
8486         struct extent_entry *entry, *best = NULL;
8487         LIST_HEAD(entries);
8488         int nr_entries = 0;
8489         int broken_entries = 0;
8490         int ret = 0;
8491         short mismatch = 0;
8492
8493         /*
8494          * Metadata is easy and the backrefs should always agree on bytenr and
8495          * size, if not we've got bigger issues.
8496          */
8497         if (rec->metadata)
8498                 return 0;
8499
8500         list_for_each_entry(back, &rec->backrefs, list) {
8501                 if (back->full_backref || !back->is_data)
8502                         continue;
8503
8504                 dback = to_data_backref(back);
8505
8506                 /*
8507                  * We only pay attention to backrefs that we found a real
8508                  * backref for.
8509                  */
8510                 if (dback->found_ref == 0)
8511                         continue;
8512
8513                 /*
8514                  * For now we only catch when the bytes don't match, not the
8515                  * bytenr.  We can easily do this at the same time, but I want
8516                  * to have a fs image to test on before we just add repair
8517                  * functionality willy-nilly so we know we won't screw up the
8518                  * repair.
8519                  */
8520
8521                 entry = find_entry(&entries, dback->disk_bytenr,
8522                                    dback->bytes);
8523                 if (!entry) {
8524                         entry = malloc(sizeof(struct extent_entry));
8525                         if (!entry) {
8526                                 ret = -ENOMEM;
8527                                 goto out;
8528                         }
8529                         memset(entry, 0, sizeof(*entry));
8530                         entry->bytenr = dback->disk_bytenr;
8531                         entry->bytes = dback->bytes;
8532                         list_add_tail(&entry->list, &entries);
8533                         nr_entries++;
8534                 }
8535
8536                 /*
8537                  * If we only have on entry we may think the entries agree when
8538                  * in reality they don't so we have to do some extra checking.
8539                  */
8540                 if (dback->disk_bytenr != rec->start ||
8541                     dback->bytes != rec->nr || back->broken)
8542                         mismatch = 1;
8543
8544                 if (back->broken) {
8545                         entry->broken++;
8546                         broken_entries++;
8547                 }
8548
8549                 entry->count++;
8550         }
8551
8552         /* Yay all the backrefs agree, carry on good sir */
8553         if (nr_entries <= 1 && !mismatch)
8554                 goto out;
8555
8556         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8557                 "%Lu\n", rec->start);
8558
8559         /*
8560          * First we want to see if the backrefs can agree amongst themselves who
8561          * is right, so figure out which one of the entries has the highest
8562          * count.
8563          */
8564         best = find_most_right_entry(&entries);
8565
8566         /*
8567          * Ok so we may have an even split between what the backrefs think, so
8568          * this is where we use the extent ref to see what it thinks.
8569          */
8570         if (!best) {
8571                 entry = find_entry(&entries, rec->start, rec->nr);
8572                 if (!entry && (!broken_entries || !rec->found_rec)) {
8573                         fprintf(stderr, "Backrefs don't agree with each other "
8574                                 "and extent record doesn't agree with anybody,"
8575                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8576                                 rec->start, rec->nr);
8577                         ret = -EINVAL;
8578                         goto out;
8579                 } else if (!entry) {
8580                         /*
8581                          * Ok our backrefs were broken, we'll assume this is the
8582                          * correct value and add an entry for this range.
8583                          */
8584                         entry = malloc(sizeof(struct extent_entry));
8585                         if (!entry) {
8586                                 ret = -ENOMEM;
8587                                 goto out;
8588                         }
8589                         memset(entry, 0, sizeof(*entry));
8590                         entry->bytenr = rec->start;
8591                         entry->bytes = rec->nr;
8592                         list_add_tail(&entry->list, &entries);
8593                         nr_entries++;
8594                 }
8595                 entry->count++;
8596                 best = find_most_right_entry(&entries);
8597                 if (!best) {
8598                         fprintf(stderr, "Backrefs and extent record evenly "
8599                                 "split on who is right, this is going to "
8600                                 "require user input to fix bytenr %Lu bytes "
8601                                 "%Lu\n", rec->start, rec->nr);
8602                         ret = -EINVAL;
8603                         goto out;
8604                 }
8605         }
8606
8607         /*
8608          * I don't think this can happen currently as we'll abort() if we catch
8609          * this case higher up, but in case somebody removes that we still can't
8610          * deal with it properly here yet, so just bail out of that's the case.
8611          */
8612         if (best->bytenr != rec->start) {
8613                 fprintf(stderr, "Extent start and backref starts don't match, "
8614                         "please use btrfs-image on this file system and send "
8615                         "it to a btrfs developer so they can make fsck fix "
8616                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8617                         rec->start, rec->nr);
8618                 ret = -EINVAL;
8619                 goto out;
8620         }
8621
8622         /*
8623          * Ok great we all agreed on an extent record, let's go find the real
8624          * references and fix up the ones that don't match.
8625          */
8626         list_for_each_entry(back, &rec->backrefs, list) {
8627                 if (back->full_backref || !back->is_data)
8628                         continue;
8629
8630                 dback = to_data_backref(back);
8631
8632                 /*
8633                  * Still ignoring backrefs that don't have a real ref attached
8634                  * to them.
8635                  */
8636                 if (dback->found_ref == 0)
8637                         continue;
8638
8639                 if (dback->bytes == best->bytes &&
8640                     dback->disk_bytenr == best->bytenr)
8641                         continue;
8642
8643                 ret = repair_ref(info, path, dback, best);
8644                 if (ret)
8645                         goto out;
8646         }
8647
8648         /*
8649          * Ok we messed with the actual refs, which means we need to drop our
8650          * entire cache and go back and rescan.  I know this is a huge pain and
8651          * adds a lot of extra work, but it's the only way to be safe.  Once all
8652          * the backrefs agree we may not need to do anything to the extent
8653          * record itself.
8654          */
8655         ret = -EAGAIN;
8656 out:
8657         while (!list_empty(&entries)) {
8658                 entry = list_entry(entries.next, struct extent_entry, list);
8659                 list_del_init(&entry->list);
8660                 free(entry);
8661         }
8662         return ret;
8663 }
8664
8665 static int process_duplicates(struct cache_tree *extent_cache,
8666                               struct extent_record *rec)
8667 {
8668         struct extent_record *good, *tmp;
8669         struct cache_extent *cache;
8670         int ret;
8671
8672         /*
8673          * If we found a extent record for this extent then return, or if we
8674          * have more than one duplicate we are likely going to need to delete
8675          * something.
8676          */
8677         if (rec->found_rec || rec->num_duplicates > 1)
8678                 return 0;
8679
8680         /* Shouldn't happen but just in case */
8681         BUG_ON(!rec->num_duplicates);
8682
8683         /*
8684          * So this happens if we end up with a backref that doesn't match the
8685          * actual extent entry.  So either the backref is bad or the extent
8686          * entry is bad.  Either way we want to have the extent_record actually
8687          * reflect what we found in the extent_tree, so we need to take the
8688          * duplicate out and use that as the extent_record since the only way we
8689          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8690          */
8691         remove_cache_extent(extent_cache, &rec->cache);
8692
8693         good = to_extent_record(rec->dups.next);
8694         list_del_init(&good->list);
8695         INIT_LIST_HEAD(&good->backrefs);
8696         INIT_LIST_HEAD(&good->dups);
8697         good->cache.start = good->start;
8698         good->cache.size = good->nr;
8699         good->content_checked = 0;
8700         good->owner_ref_checked = 0;
8701         good->num_duplicates = 0;
8702         good->refs = rec->refs;
8703         list_splice_init(&rec->backrefs, &good->backrefs);
8704         while (1) {
8705                 cache = lookup_cache_extent(extent_cache, good->start,
8706                                             good->nr);
8707                 if (!cache)
8708                         break;
8709                 tmp = container_of(cache, struct extent_record, cache);
8710
8711                 /*
8712                  * If we find another overlapping extent and it's found_rec is
8713                  * set then it's a duplicate and we need to try and delete
8714                  * something.
8715                  */
8716                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8717                         if (list_empty(&good->list))
8718                                 list_add_tail(&good->list,
8719                                               &duplicate_extents);
8720                         good->num_duplicates += tmp->num_duplicates + 1;
8721                         list_splice_init(&tmp->dups, &good->dups);
8722                         list_del_init(&tmp->list);
8723                         list_add_tail(&tmp->list, &good->dups);
8724                         remove_cache_extent(extent_cache, &tmp->cache);
8725                         continue;
8726                 }
8727
8728                 /*
8729                  * Ok we have another non extent item backed extent rec, so lets
8730                  * just add it to this extent and carry on like we did above.
8731                  */
8732                 good->refs += tmp->refs;
8733                 list_splice_init(&tmp->backrefs, &good->backrefs);
8734                 remove_cache_extent(extent_cache, &tmp->cache);
8735                 free(tmp);
8736         }
8737         ret = insert_cache_extent(extent_cache, &good->cache);
8738         BUG_ON(ret);
8739         free(rec);
8740         return good->num_duplicates ? 0 : 1;
8741 }
8742
8743 static int delete_duplicate_records(struct btrfs_root *root,
8744                                     struct extent_record *rec)
8745 {
8746         struct btrfs_trans_handle *trans;
8747         LIST_HEAD(delete_list);
8748         struct btrfs_path path;
8749         struct extent_record *tmp, *good, *n;
8750         int nr_del = 0;
8751         int ret = 0, err;
8752         struct btrfs_key key;
8753
8754         btrfs_init_path(&path);
8755
8756         good = rec;
8757         /* Find the record that covers all of the duplicates. */
8758         list_for_each_entry(tmp, &rec->dups, list) {
8759                 if (good->start < tmp->start)
8760                         continue;
8761                 if (good->nr > tmp->nr)
8762                         continue;
8763
8764                 if (tmp->start + tmp->nr < good->start + good->nr) {
8765                         fprintf(stderr, "Ok we have overlapping extents that "
8766                                 "aren't completely covered by each other, this "
8767                                 "is going to require more careful thought.  "
8768                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8769                                 tmp->start, tmp->nr, good->start, good->nr);
8770                         abort();
8771                 }
8772                 good = tmp;
8773         }
8774
8775         if (good != rec)
8776                 list_add_tail(&rec->list, &delete_list);
8777
8778         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8779                 if (tmp == good)
8780                         continue;
8781                 list_move_tail(&tmp->list, &delete_list);
8782         }
8783
8784         root = root->fs_info->extent_root;
8785         trans = btrfs_start_transaction(root, 1);
8786         if (IS_ERR(trans)) {
8787                 ret = PTR_ERR(trans);
8788                 goto out;
8789         }
8790
8791         list_for_each_entry(tmp, &delete_list, list) {
8792                 if (tmp->found_rec == 0)
8793                         continue;
8794                 key.objectid = tmp->start;
8795                 key.type = BTRFS_EXTENT_ITEM_KEY;
8796                 key.offset = tmp->nr;
8797
8798                 /* Shouldn't happen but just in case */
8799                 if (tmp->metadata) {
8800                         fprintf(stderr, "Well this shouldn't happen, extent "
8801                                 "record overlaps but is metadata? "
8802                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8803                         abort();
8804                 }
8805
8806                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8807                 if (ret) {
8808                         if (ret > 0)
8809                                 ret = -EINVAL;
8810                         break;
8811                 }
8812                 ret = btrfs_del_item(trans, root, &path);
8813                 if (ret)
8814                         break;
8815                 btrfs_release_path(&path);
8816                 nr_del++;
8817         }
8818         err = btrfs_commit_transaction(trans, root);
8819         if (err && !ret)
8820                 ret = err;
8821 out:
8822         while (!list_empty(&delete_list)) {
8823                 tmp = to_extent_record(delete_list.next);
8824                 list_del_init(&tmp->list);
8825                 if (tmp == rec)
8826                         continue;
8827                 free(tmp);
8828         }
8829
8830         while (!list_empty(&rec->dups)) {
8831                 tmp = to_extent_record(rec->dups.next);
8832                 list_del_init(&tmp->list);
8833                 free(tmp);
8834         }
8835
8836         btrfs_release_path(&path);
8837
8838         if (!ret && !nr_del)
8839                 rec->num_duplicates = 0;
8840
8841         return ret ? ret : nr_del;
8842 }
8843
8844 static int find_possible_backrefs(struct btrfs_fs_info *info,
8845                                   struct btrfs_path *path,
8846                                   struct cache_tree *extent_cache,
8847                                   struct extent_record *rec)
8848 {
8849         struct btrfs_root *root;
8850         struct extent_backref *back;
8851         struct data_backref *dback;
8852         struct cache_extent *cache;
8853         struct btrfs_file_extent_item *fi;
8854         struct btrfs_key key;
8855         u64 bytenr, bytes;
8856         int ret;
8857
8858         list_for_each_entry(back, &rec->backrefs, list) {
8859                 /* Don't care about full backrefs (poor unloved backrefs) */
8860                 if (back->full_backref || !back->is_data)
8861                         continue;
8862
8863                 dback = to_data_backref(back);
8864
8865                 /* We found this one, we don't need to do a lookup */
8866                 if (dback->found_ref)
8867                         continue;
8868
8869                 key.objectid = dback->root;
8870                 key.type = BTRFS_ROOT_ITEM_KEY;
8871                 key.offset = (u64)-1;
8872
8873                 root = btrfs_read_fs_root(info, &key);
8874
8875                 /* No root, definitely a bad ref, skip */
8876                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8877                         continue;
8878                 /* Other err, exit */
8879                 if (IS_ERR(root))
8880                         return PTR_ERR(root);
8881
8882                 key.objectid = dback->owner;
8883                 key.type = BTRFS_EXTENT_DATA_KEY;
8884                 key.offset = dback->offset;
8885                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8886                 if (ret) {
8887                         btrfs_release_path(path);
8888                         if (ret < 0)
8889                                 return ret;
8890                         /* Didn't find it, we can carry on */
8891                         ret = 0;
8892                         continue;
8893                 }
8894
8895                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8896                                     struct btrfs_file_extent_item);
8897                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8898                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8899                 btrfs_release_path(path);
8900                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8901                 if (cache) {
8902                         struct extent_record *tmp;
8903                         tmp = container_of(cache, struct extent_record, cache);
8904
8905                         /*
8906                          * If we found an extent record for the bytenr for this
8907                          * particular backref then we can't add it to our
8908                          * current extent record.  We only want to add backrefs
8909                          * that don't have a corresponding extent item in the
8910                          * extent tree since they likely belong to this record
8911                          * and we need to fix it if it doesn't match bytenrs.
8912                          */
8913                         if  (tmp->found_rec)
8914                                 continue;
8915                 }
8916
8917                 dback->found_ref += 1;
8918                 dback->disk_bytenr = bytenr;
8919                 dback->bytes = bytes;
8920
8921                 /*
8922                  * Set this so the verify backref code knows not to trust the
8923                  * values in this backref.
8924                  */
8925                 back->broken = 1;
8926         }
8927
8928         return 0;
8929 }
8930
8931 /*
8932  * Record orphan data ref into corresponding root.
8933  *
8934  * Return 0 if the extent item contains data ref and recorded.
8935  * Return 1 if the extent item contains no useful data ref
8936  *   On that case, it may contains only shared_dataref or metadata backref
8937  *   or the file extent exists(this should be handled by the extent bytenr
8938  *   recovery routine)
8939  * Return <0 if something goes wrong.
8940  */
8941 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8942                                       struct extent_record *rec)
8943 {
8944         struct btrfs_key key;
8945         struct btrfs_root *dest_root;
8946         struct extent_backref *back;
8947         struct data_backref *dback;
8948         struct orphan_data_extent *orphan;
8949         struct btrfs_path path;
8950         int recorded_data_ref = 0;
8951         int ret = 0;
8952
8953         if (rec->metadata)
8954                 return 1;
8955         btrfs_init_path(&path);
8956         list_for_each_entry(back, &rec->backrefs, list) {
8957                 if (back->full_backref || !back->is_data ||
8958                     !back->found_extent_tree)
8959                         continue;
8960                 dback = to_data_backref(back);
8961                 if (dback->found_ref)
8962                         continue;
8963                 key.objectid = dback->root;
8964                 key.type = BTRFS_ROOT_ITEM_KEY;
8965                 key.offset = (u64)-1;
8966
8967                 dest_root = btrfs_read_fs_root(fs_info, &key);
8968
8969                 /* For non-exist root we just skip it */
8970                 if (IS_ERR(dest_root) || !dest_root)
8971                         continue;
8972
8973                 key.objectid = dback->owner;
8974                 key.type = BTRFS_EXTENT_DATA_KEY;
8975                 key.offset = dback->offset;
8976
8977                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8978                 btrfs_release_path(&path);
8979                 /*
8980                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8981                  * we need to record it for inode/file extent rebuild.
8982                  * For ret > 0, we record it only for file extent rebuild.
8983                  * For ret == 0, the file extent exists but only bytenr
8984                  * mismatch, let the original bytenr fix routine to handle,
8985                  * don't record it.
8986                  */
8987                 if (ret == 0)
8988                         continue;
8989                 ret = 0;
8990                 orphan = malloc(sizeof(*orphan));
8991                 if (!orphan) {
8992                         ret = -ENOMEM;
8993                         goto out;
8994                 }
8995                 INIT_LIST_HEAD(&orphan->list);
8996                 orphan->root = dback->root;
8997                 orphan->objectid = dback->owner;
8998                 orphan->offset = dback->offset;
8999                 orphan->disk_bytenr = rec->cache.start;
9000                 orphan->disk_len = rec->cache.size;
9001                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9002                 recorded_data_ref = 1;
9003         }
9004 out:
9005         btrfs_release_path(&path);
9006         if (!ret)
9007                 return !recorded_data_ref;
9008         else
9009                 return ret;
9010 }
9011
9012 /*
9013  * when an incorrect extent item is found, this will delete
9014  * all of the existing entries for it and recreate them
9015  * based on what the tree scan found.
9016  */
9017 static int fixup_extent_refs(struct btrfs_fs_info *info,
9018                              struct cache_tree *extent_cache,
9019                              struct extent_record *rec)
9020 {
9021         struct btrfs_trans_handle *trans = NULL;
9022         int ret;
9023         struct btrfs_path path;
9024         struct list_head *cur = rec->backrefs.next;
9025         struct cache_extent *cache;
9026         struct extent_backref *back;
9027         int allocated = 0;
9028         u64 flags = 0;
9029
9030         if (rec->flag_block_full_backref)
9031                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9032
9033         btrfs_init_path(&path);
9034         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9035                 /*
9036                  * Sometimes the backrefs themselves are so broken they don't
9037                  * get attached to any meaningful rec, so first go back and
9038                  * check any of our backrefs that we couldn't find and throw
9039                  * them into the list if we find the backref so that
9040                  * verify_backrefs can figure out what to do.
9041                  */
9042                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9043                 if (ret < 0)
9044                         goto out;
9045         }
9046
9047         /* step one, make sure all of the backrefs agree */
9048         ret = verify_backrefs(info, &path, rec);
9049         if (ret < 0)
9050                 goto out;
9051
9052         trans = btrfs_start_transaction(info->extent_root, 1);
9053         if (IS_ERR(trans)) {
9054                 ret = PTR_ERR(trans);
9055                 goto out;
9056         }
9057
9058         /* step two, delete all the existing records */
9059         ret = delete_extent_records(trans, info->extent_root, &path,
9060                                     rec->start);
9061
9062         if (ret < 0)
9063                 goto out;
9064
9065         /* was this block corrupt?  If so, don't add references to it */
9066         cache = lookup_cache_extent(info->corrupt_blocks,
9067                                     rec->start, rec->max_size);
9068         if (cache) {
9069                 ret = 0;
9070                 goto out;
9071         }
9072
9073         /* step three, recreate all the refs we did find */
9074         while(cur != &rec->backrefs) {
9075                 back = to_extent_backref(cur);
9076                 cur = cur->next;
9077
9078                 /*
9079                  * if we didn't find any references, don't create a
9080                  * new extent record
9081                  */
9082                 if (!back->found_ref)
9083                         continue;
9084
9085                 rec->bad_full_backref = 0;
9086                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9087                 allocated = 1;
9088
9089                 if (ret)
9090                         goto out;
9091         }
9092 out:
9093         if (trans) {
9094                 int err = btrfs_commit_transaction(trans, info->extent_root);
9095                 if (!ret)
9096                         ret = err;
9097         }
9098
9099         if (!ret)
9100                 fprintf(stderr, "Repaired extent references for %llu\n",
9101                                 (unsigned long long)rec->start);
9102
9103         btrfs_release_path(&path);
9104         return ret;
9105 }
9106
9107 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9108                               struct extent_record *rec)
9109 {
9110         struct btrfs_trans_handle *trans;
9111         struct btrfs_root *root = fs_info->extent_root;
9112         struct btrfs_path path;
9113         struct btrfs_extent_item *ei;
9114         struct btrfs_key key;
9115         u64 flags;
9116         int ret = 0;
9117
9118         key.objectid = rec->start;
9119         if (rec->metadata) {
9120                 key.type = BTRFS_METADATA_ITEM_KEY;
9121                 key.offset = rec->info_level;
9122         } else {
9123                 key.type = BTRFS_EXTENT_ITEM_KEY;
9124                 key.offset = rec->max_size;
9125         }
9126
9127         trans = btrfs_start_transaction(root, 0);
9128         if (IS_ERR(trans))
9129                 return PTR_ERR(trans);
9130
9131         btrfs_init_path(&path);
9132         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9133         if (ret < 0) {
9134                 btrfs_release_path(&path);
9135                 btrfs_commit_transaction(trans, root);
9136                 return ret;
9137         } else if (ret) {
9138                 fprintf(stderr, "Didn't find extent for %llu\n",
9139                         (unsigned long long)rec->start);
9140                 btrfs_release_path(&path);
9141                 btrfs_commit_transaction(trans, root);
9142                 return -ENOENT;
9143         }
9144
9145         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9146                             struct btrfs_extent_item);
9147         flags = btrfs_extent_flags(path.nodes[0], ei);
9148         if (rec->flag_block_full_backref) {
9149                 fprintf(stderr, "setting full backref on %llu\n",
9150                         (unsigned long long)key.objectid);
9151                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9152         } else {
9153                 fprintf(stderr, "clearing full backref on %llu\n",
9154                         (unsigned long long)key.objectid);
9155                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9156         }
9157         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9158         btrfs_mark_buffer_dirty(path.nodes[0]);
9159         btrfs_release_path(&path);
9160         ret = btrfs_commit_transaction(trans, root);
9161         if (!ret)
9162                 fprintf(stderr, "Repaired extent flags for %llu\n",
9163                                 (unsigned long long)rec->start);
9164
9165         return ret;
9166 }
9167
9168 /* right now we only prune from the extent allocation tree */
9169 static int prune_one_block(struct btrfs_trans_handle *trans,
9170                            struct btrfs_fs_info *info,
9171                            struct btrfs_corrupt_block *corrupt)
9172 {
9173         int ret;
9174         struct btrfs_path path;
9175         struct extent_buffer *eb;
9176         u64 found;
9177         int slot;
9178         int nritems;
9179         int level = corrupt->level + 1;
9180
9181         btrfs_init_path(&path);
9182 again:
9183         /* we want to stop at the parent to our busted block */
9184         path.lowest_level = level;
9185
9186         ret = btrfs_search_slot(trans, info->extent_root,
9187                                 &corrupt->key, &path, -1, 1);
9188
9189         if (ret < 0)
9190                 goto out;
9191
9192         eb = path.nodes[level];
9193         if (!eb) {
9194                 ret = -ENOENT;
9195                 goto out;
9196         }
9197
9198         /*
9199          * hopefully the search gave us the block we want to prune,
9200          * lets try that first
9201          */
9202         slot = path.slots[level];
9203         found =  btrfs_node_blockptr(eb, slot);
9204         if (found == corrupt->cache.start)
9205                 goto del_ptr;
9206
9207         nritems = btrfs_header_nritems(eb);
9208
9209         /* the search failed, lets scan this node and hope we find it */
9210         for (slot = 0; slot < nritems; slot++) {
9211                 found =  btrfs_node_blockptr(eb, slot);
9212                 if (found == corrupt->cache.start)
9213                         goto del_ptr;
9214         }
9215         /*
9216          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9217          * to this block
9218          */
9219         if (eb == info->extent_root->node) {
9220                 ret = -ENOENT;
9221                 goto out;
9222         } else {
9223                 level++;
9224                 btrfs_release_path(&path);
9225                 goto again;
9226         }
9227
9228 del_ptr:
9229         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9230         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9231
9232 out:
9233         btrfs_release_path(&path);
9234         return ret;
9235 }
9236
9237 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9238 {
9239         struct btrfs_trans_handle *trans = NULL;
9240         struct cache_extent *cache;
9241         struct btrfs_corrupt_block *corrupt;
9242
9243         while (1) {
9244                 cache = search_cache_extent(info->corrupt_blocks, 0);
9245                 if (!cache)
9246                         break;
9247                 if (!trans) {
9248                         trans = btrfs_start_transaction(info->extent_root, 1);
9249                         if (IS_ERR(trans))
9250                                 return PTR_ERR(trans);
9251                 }
9252                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9253                 prune_one_block(trans, info, corrupt);
9254                 remove_cache_extent(info->corrupt_blocks, cache);
9255         }
9256         if (trans)
9257                 return btrfs_commit_transaction(trans, info->extent_root);
9258         return 0;
9259 }
9260
9261 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9262 {
9263         struct btrfs_block_group_cache *cache;
9264         u64 start, end;
9265         int ret;
9266
9267         while (1) {
9268                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9269                                             &start, &end, EXTENT_DIRTY);
9270                 if (ret)
9271                         break;
9272                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9273         }
9274
9275         start = 0;
9276         while (1) {
9277                 cache = btrfs_lookup_first_block_group(fs_info, start);
9278                 if (!cache)
9279                         break;
9280                 if (cache->cached)
9281                         cache->cached = 0;
9282                 start = cache->key.objectid + cache->key.offset;
9283         }
9284 }
9285
9286 static int check_extent_refs(struct btrfs_root *root,
9287                              struct cache_tree *extent_cache)
9288 {
9289         struct extent_record *rec;
9290         struct cache_extent *cache;
9291         int ret = 0;
9292         int had_dups = 0;
9293
9294         if (repair) {
9295                 /*
9296                  * if we're doing a repair, we have to make sure
9297                  * we don't allocate from the problem extents.
9298                  * In the worst case, this will be all the
9299                  * extents in the FS
9300                  */
9301                 cache = search_cache_extent(extent_cache, 0);
9302                 while(cache) {
9303                         rec = container_of(cache, struct extent_record, cache);
9304                         set_extent_dirty(root->fs_info->excluded_extents,
9305                                          rec->start,
9306                                          rec->start + rec->max_size - 1);
9307                         cache = next_cache_extent(cache);
9308                 }
9309
9310                 /* pin down all the corrupted blocks too */
9311                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9312                 while(cache) {
9313                         set_extent_dirty(root->fs_info->excluded_extents,
9314                                          cache->start,
9315                                          cache->start + cache->size - 1);
9316                         cache = next_cache_extent(cache);
9317                 }
9318                 prune_corrupt_blocks(root->fs_info);
9319                 reset_cached_block_groups(root->fs_info);
9320         }
9321
9322         reset_cached_block_groups(root->fs_info);
9323
9324         /*
9325          * We need to delete any duplicate entries we find first otherwise we
9326          * could mess up the extent tree when we have backrefs that actually
9327          * belong to a different extent item and not the weird duplicate one.
9328          */
9329         while (repair && !list_empty(&duplicate_extents)) {
9330                 rec = to_extent_record(duplicate_extents.next);
9331                 list_del_init(&rec->list);
9332
9333                 /* Sometimes we can find a backref before we find an actual
9334                  * extent, so we need to process it a little bit to see if there
9335                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9336                  * if this is a backref screwup.  If we need to delete stuff
9337                  * process_duplicates() will return 0, otherwise it will return
9338                  * 1 and we
9339                  */
9340                 if (process_duplicates(extent_cache, rec))
9341                         continue;
9342                 ret = delete_duplicate_records(root, rec);
9343                 if (ret < 0)
9344                         return ret;
9345                 /*
9346                  * delete_duplicate_records will return the number of entries
9347                  * deleted, so if it's greater than 0 then we know we actually
9348                  * did something and we need to remove.
9349                  */
9350                 if (ret)
9351                         had_dups = 1;
9352         }
9353
9354         if (had_dups)
9355                 return -EAGAIN;
9356
9357         while(1) {
9358                 int cur_err = 0;
9359                 int fix = 0;
9360
9361                 cache = search_cache_extent(extent_cache, 0);
9362                 if (!cache)
9363                         break;
9364                 rec = container_of(cache, struct extent_record, cache);
9365                 if (rec->num_duplicates) {
9366                         fprintf(stderr, "extent item %llu has multiple extent "
9367                                 "items\n", (unsigned long long)rec->start);
9368                         cur_err = 1;
9369                 }
9370
9371                 if (rec->refs != rec->extent_item_refs) {
9372                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9373                                 (unsigned long long)rec->start,
9374                                 (unsigned long long)rec->nr);
9375                         fprintf(stderr, "extent item %llu, found %llu\n",
9376                                 (unsigned long long)rec->extent_item_refs,
9377                                 (unsigned long long)rec->refs);
9378                         ret = record_orphan_data_extents(root->fs_info, rec);
9379                         if (ret < 0)
9380                                 goto repair_abort;
9381                         fix = ret;
9382                         cur_err = 1;
9383                 }
9384                 if (all_backpointers_checked(rec, 1)) {
9385                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9386                                 (unsigned long long)rec->start,
9387                                 (unsigned long long)rec->nr);
9388                         fix = 1;
9389                         cur_err = 1;
9390                 }
9391                 if (!rec->owner_ref_checked) {
9392                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9393                                 (unsigned long long)rec->start,
9394                                 (unsigned long long)rec->nr);
9395                         fix = 1;
9396                         cur_err = 1;
9397                 }
9398
9399                 if (repair && fix) {
9400                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9401                         if (ret)
9402                                 goto repair_abort;
9403                 }
9404
9405
9406                 if (rec->bad_full_backref) {
9407                         fprintf(stderr, "bad full backref, on [%llu]\n",
9408                                 (unsigned long long)rec->start);
9409                         if (repair) {
9410                                 ret = fixup_extent_flags(root->fs_info, rec);
9411                                 if (ret)
9412                                         goto repair_abort;
9413                                 fix = 1;
9414                         }
9415                         cur_err = 1;
9416                 }
9417                 /*
9418                  * Although it's not a extent ref's problem, we reuse this
9419                  * routine for error reporting.
9420                  * No repair function yet.
9421                  */
9422                 if (rec->crossing_stripes) {
9423                         fprintf(stderr,
9424                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9425                                 rec->start, rec->start + rec->max_size);
9426                         cur_err = 1;
9427                 }
9428
9429                 if (rec->wrong_chunk_type) {
9430                         fprintf(stderr,
9431                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9432                                 rec->start, rec->start + rec->max_size);
9433                         cur_err = 1;
9434                 }
9435
9436                 remove_cache_extent(extent_cache, cache);
9437                 free_all_extent_backrefs(rec);
9438                 if (!init_extent_tree && repair && (!cur_err || fix))
9439                         clear_extent_dirty(root->fs_info->excluded_extents,
9440                                            rec->start,
9441                                            rec->start + rec->max_size - 1);
9442                 free(rec);
9443         }
9444 repair_abort:
9445         if (repair) {
9446                 if (ret && ret != -EAGAIN) {
9447                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9448                         exit(1);
9449                 } else if (!ret) {
9450                         struct btrfs_trans_handle *trans;
9451
9452                         root = root->fs_info->extent_root;
9453                         trans = btrfs_start_transaction(root, 1);
9454                         if (IS_ERR(trans)) {
9455                                 ret = PTR_ERR(trans);
9456                                 goto repair_abort;
9457                         }
9458
9459                         btrfs_fix_block_accounting(trans, root);
9460                         ret = btrfs_commit_transaction(trans, root);
9461                         if (ret)
9462                                 goto repair_abort;
9463                 }
9464                 return ret;
9465         }
9466         return 0;
9467 }
9468
9469 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9470 {
9471         u64 stripe_size;
9472
9473         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9474                 stripe_size = length;
9475                 stripe_size /= num_stripes;
9476         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9477                 stripe_size = length * 2;
9478                 stripe_size /= num_stripes;
9479         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9480                 stripe_size = length;
9481                 stripe_size /= (num_stripes - 1);
9482         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9483                 stripe_size = length;
9484                 stripe_size /= (num_stripes - 2);
9485         } else {
9486                 stripe_size = length;
9487         }
9488         return stripe_size;
9489 }
9490
9491 /*
9492  * Check the chunk with its block group/dev list ref:
9493  * Return 0 if all refs seems valid.
9494  * Return 1 if part of refs seems valid, need later check for rebuild ref
9495  * like missing block group and needs to search extent tree to rebuild them.
9496  * Return -1 if essential refs are missing and unable to rebuild.
9497  */
9498 static int check_chunk_refs(struct chunk_record *chunk_rec,
9499                             struct block_group_tree *block_group_cache,
9500                             struct device_extent_tree *dev_extent_cache,
9501                             int silent)
9502 {
9503         struct cache_extent *block_group_item;
9504         struct block_group_record *block_group_rec;
9505         struct cache_extent *dev_extent_item;
9506         struct device_extent_record *dev_extent_rec;
9507         u64 devid;
9508         u64 offset;
9509         u64 length;
9510         int metadump_v2 = 0;
9511         int i;
9512         int ret = 0;
9513
9514         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9515                                                chunk_rec->offset,
9516                                                chunk_rec->length);
9517         if (block_group_item) {
9518                 block_group_rec = container_of(block_group_item,
9519                                                struct block_group_record,
9520                                                cache);
9521                 if (chunk_rec->length != block_group_rec->offset ||
9522                     chunk_rec->offset != block_group_rec->objectid ||
9523                     (!metadump_v2 &&
9524                      chunk_rec->type_flags != block_group_rec->flags)) {
9525                         if (!silent)
9526                                 fprintf(stderr,
9527                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9528                                         chunk_rec->objectid,
9529                                         chunk_rec->type,
9530                                         chunk_rec->offset,
9531                                         chunk_rec->length,
9532                                         chunk_rec->offset,
9533                                         chunk_rec->type_flags,
9534                                         block_group_rec->objectid,
9535                                         block_group_rec->type,
9536                                         block_group_rec->offset,
9537                                         block_group_rec->offset,
9538                                         block_group_rec->objectid,
9539                                         block_group_rec->flags);
9540                         ret = -1;
9541                 } else {
9542                         list_del_init(&block_group_rec->list);
9543                         chunk_rec->bg_rec = block_group_rec;
9544                 }
9545         } else {
9546                 if (!silent)
9547                         fprintf(stderr,
9548                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9549                                 chunk_rec->objectid,
9550                                 chunk_rec->type,
9551                                 chunk_rec->offset,
9552                                 chunk_rec->length,
9553                                 chunk_rec->offset,
9554                                 chunk_rec->type_flags);
9555                 ret = 1;
9556         }
9557
9558         if (metadump_v2)
9559                 return ret;
9560
9561         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9562                                     chunk_rec->num_stripes);
9563         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9564                 devid = chunk_rec->stripes[i].devid;
9565                 offset = chunk_rec->stripes[i].offset;
9566                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9567                                                        devid, offset, length);
9568                 if (dev_extent_item) {
9569                         dev_extent_rec = container_of(dev_extent_item,
9570                                                 struct device_extent_record,
9571                                                 cache);
9572                         if (dev_extent_rec->objectid != devid ||
9573                             dev_extent_rec->offset != offset ||
9574                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9575                             dev_extent_rec->length != length) {
9576                                 if (!silent)
9577                                         fprintf(stderr,
9578                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9579                                                 chunk_rec->objectid,
9580                                                 chunk_rec->type,
9581                                                 chunk_rec->offset,
9582                                                 chunk_rec->stripes[i].devid,
9583                                                 chunk_rec->stripes[i].offset,
9584                                                 dev_extent_rec->objectid,
9585                                                 dev_extent_rec->offset,
9586                                                 dev_extent_rec->length);
9587                                 ret = -1;
9588                         } else {
9589                                 list_move(&dev_extent_rec->chunk_list,
9590                                           &chunk_rec->dextents);
9591                         }
9592                 } else {
9593                         if (!silent)
9594                                 fprintf(stderr,
9595                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9596                                         chunk_rec->objectid,
9597                                         chunk_rec->type,
9598                                         chunk_rec->offset,
9599                                         chunk_rec->stripes[i].devid,
9600                                         chunk_rec->stripes[i].offset);
9601                         ret = -1;
9602                 }
9603         }
9604         return ret;
9605 }
9606
9607 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9608 int check_chunks(struct cache_tree *chunk_cache,
9609                  struct block_group_tree *block_group_cache,
9610                  struct device_extent_tree *dev_extent_cache,
9611                  struct list_head *good, struct list_head *bad,
9612                  struct list_head *rebuild, int silent)
9613 {
9614         struct cache_extent *chunk_item;
9615         struct chunk_record *chunk_rec;
9616         struct block_group_record *bg_rec;
9617         struct device_extent_record *dext_rec;
9618         int err;
9619         int ret = 0;
9620
9621         chunk_item = first_cache_extent(chunk_cache);
9622         while (chunk_item) {
9623                 chunk_rec = container_of(chunk_item, struct chunk_record,
9624                                          cache);
9625                 err = check_chunk_refs(chunk_rec, block_group_cache,
9626                                        dev_extent_cache, silent);
9627                 if (err < 0)
9628                         ret = err;
9629                 if (err == 0 && good)
9630                         list_add_tail(&chunk_rec->list, good);
9631                 if (err > 0 && rebuild)
9632                         list_add_tail(&chunk_rec->list, rebuild);
9633                 if (err < 0 && bad)
9634                         list_add_tail(&chunk_rec->list, bad);
9635                 chunk_item = next_cache_extent(chunk_item);
9636         }
9637
9638         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9639                 if (!silent)
9640                         fprintf(stderr,
9641                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9642                                 bg_rec->objectid,
9643                                 bg_rec->offset,
9644                                 bg_rec->flags);
9645                 if (!ret)
9646                         ret = 1;
9647         }
9648
9649         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9650                             chunk_list) {
9651                 if (!silent)
9652                         fprintf(stderr,
9653                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9654                                 dext_rec->objectid,
9655                                 dext_rec->offset,
9656                                 dext_rec->length);
9657                 if (!ret)
9658                         ret = 1;
9659         }
9660         return ret;
9661 }
9662
9663
9664 static int check_device_used(struct device_record *dev_rec,
9665                              struct device_extent_tree *dext_cache)
9666 {
9667         struct cache_extent *cache;
9668         struct device_extent_record *dev_extent_rec;
9669         u64 total_byte = 0;
9670
9671         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9672         while (cache) {
9673                 dev_extent_rec = container_of(cache,
9674                                               struct device_extent_record,
9675                                               cache);
9676                 if (dev_extent_rec->objectid != dev_rec->devid)
9677                         break;
9678
9679                 list_del_init(&dev_extent_rec->device_list);
9680                 total_byte += dev_extent_rec->length;
9681                 cache = next_cache_extent(cache);
9682         }
9683
9684         if (total_byte != dev_rec->byte_used) {
9685                 fprintf(stderr,
9686                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9687                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9688                         dev_rec->type, dev_rec->offset);
9689                 return -1;
9690         } else {
9691                 return 0;
9692         }
9693 }
9694
9695 /* check btrfs_dev_item -> btrfs_dev_extent */
9696 static int check_devices(struct rb_root *dev_cache,
9697                          struct device_extent_tree *dev_extent_cache)
9698 {
9699         struct rb_node *dev_node;
9700         struct device_record *dev_rec;
9701         struct device_extent_record *dext_rec;
9702         int err;
9703         int ret = 0;
9704
9705         dev_node = rb_first(dev_cache);
9706         while (dev_node) {
9707                 dev_rec = container_of(dev_node, struct device_record, node);
9708                 err = check_device_used(dev_rec, dev_extent_cache);
9709                 if (err)
9710                         ret = err;
9711
9712                 dev_node = rb_next(dev_node);
9713         }
9714         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9715                             device_list) {
9716                 fprintf(stderr,
9717                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9718                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9719                 if (!ret)
9720                         ret = 1;
9721         }
9722         return ret;
9723 }
9724
9725 static int add_root_item_to_list(struct list_head *head,
9726                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9727                                   u8 level, u8 drop_level,
9728                                   int level_size, struct btrfs_key *drop_key)
9729 {
9730
9731         struct root_item_record *ri_rec;
9732         ri_rec = malloc(sizeof(*ri_rec));
9733         if (!ri_rec)
9734                 return -ENOMEM;
9735         ri_rec->bytenr = bytenr;
9736         ri_rec->objectid = objectid;
9737         ri_rec->level = level;
9738         ri_rec->level_size = level_size;
9739         ri_rec->drop_level = drop_level;
9740         ri_rec->last_snapshot = last_snapshot;
9741         if (drop_key)
9742                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9743         list_add_tail(&ri_rec->list, head);
9744
9745         return 0;
9746 }
9747
9748 static void free_root_item_list(struct list_head *list)
9749 {
9750         struct root_item_record *ri_rec;
9751
9752         while (!list_empty(list)) {
9753                 ri_rec = list_first_entry(list, struct root_item_record,
9754                                           list);
9755                 list_del_init(&ri_rec->list);
9756                 free(ri_rec);
9757         }
9758 }
9759
9760 static int deal_root_from_list(struct list_head *list,
9761                                struct btrfs_root *root,
9762                                struct block_info *bits,
9763                                int bits_nr,
9764                                struct cache_tree *pending,
9765                                struct cache_tree *seen,
9766                                struct cache_tree *reada,
9767                                struct cache_tree *nodes,
9768                                struct cache_tree *extent_cache,
9769                                struct cache_tree *chunk_cache,
9770                                struct rb_root *dev_cache,
9771                                struct block_group_tree *block_group_cache,
9772                                struct device_extent_tree *dev_extent_cache)
9773 {
9774         int ret = 0;
9775         u64 last;
9776
9777         while (!list_empty(list)) {
9778                 struct root_item_record *rec;
9779                 struct extent_buffer *buf;
9780                 rec = list_entry(list->next,
9781                                  struct root_item_record, list);
9782                 last = 0;
9783                 buf = read_tree_block(root->fs_info,
9784                                       rec->bytenr, rec->level_size, 0);
9785                 if (!extent_buffer_uptodate(buf)) {
9786                         free_extent_buffer(buf);
9787                         ret = -EIO;
9788                         break;
9789                 }
9790                 ret = add_root_to_pending(buf, extent_cache, pending,
9791                                     seen, nodes, rec->objectid);
9792                 if (ret < 0)
9793                         break;
9794                 /*
9795                  * To rebuild extent tree, we need deal with snapshot
9796                  * one by one, otherwise we deal with node firstly which
9797                  * can maximize readahead.
9798                  */
9799                 while (1) {
9800                         ret = run_next_block(root, bits, bits_nr, &last,
9801                                              pending, seen, reada, nodes,
9802                                              extent_cache, chunk_cache,
9803                                              dev_cache, block_group_cache,
9804                                              dev_extent_cache, rec);
9805                         if (ret != 0)
9806                                 break;
9807                 }
9808                 free_extent_buffer(buf);
9809                 list_del(&rec->list);
9810                 free(rec);
9811                 if (ret < 0)
9812                         break;
9813         }
9814         while (ret >= 0) {
9815                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9816                                      reada, nodes, extent_cache, chunk_cache,
9817                                      dev_cache, block_group_cache,
9818                                      dev_extent_cache, NULL);
9819                 if (ret != 0) {
9820                         if (ret > 0)
9821                                 ret = 0;
9822                         break;
9823                 }
9824         }
9825         return ret;
9826 }
9827
9828 static int check_chunks_and_extents(struct btrfs_root *root)
9829 {
9830         struct rb_root dev_cache;
9831         struct cache_tree chunk_cache;
9832         struct block_group_tree block_group_cache;
9833         struct device_extent_tree dev_extent_cache;
9834         struct cache_tree extent_cache;
9835         struct cache_tree seen;
9836         struct cache_tree pending;
9837         struct cache_tree reada;
9838         struct cache_tree nodes;
9839         struct extent_io_tree excluded_extents;
9840         struct cache_tree corrupt_blocks;
9841         struct btrfs_path path;
9842         struct btrfs_key key;
9843         struct btrfs_key found_key;
9844         int ret, err = 0;
9845         struct block_info *bits;
9846         int bits_nr;
9847         struct extent_buffer *leaf;
9848         int slot;
9849         struct btrfs_root_item ri;
9850         struct list_head dropping_trees;
9851         struct list_head normal_trees;
9852         struct btrfs_root *root1;
9853         u64 objectid;
9854         u32 level_size;
9855         u8 level;
9856
9857         dev_cache = RB_ROOT;
9858         cache_tree_init(&chunk_cache);
9859         block_group_tree_init(&block_group_cache);
9860         device_extent_tree_init(&dev_extent_cache);
9861
9862         cache_tree_init(&extent_cache);
9863         cache_tree_init(&seen);
9864         cache_tree_init(&pending);
9865         cache_tree_init(&nodes);
9866         cache_tree_init(&reada);
9867         cache_tree_init(&corrupt_blocks);
9868         extent_io_tree_init(&excluded_extents);
9869         INIT_LIST_HEAD(&dropping_trees);
9870         INIT_LIST_HEAD(&normal_trees);
9871
9872         if (repair) {
9873                 root->fs_info->excluded_extents = &excluded_extents;
9874                 root->fs_info->fsck_extent_cache = &extent_cache;
9875                 root->fs_info->free_extent_hook = free_extent_hook;
9876                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9877         }
9878
9879         bits_nr = 1024;
9880         bits = malloc(bits_nr * sizeof(struct block_info));
9881         if (!bits) {
9882                 perror("malloc");
9883                 exit(1);
9884         }
9885
9886         if (ctx.progress_enabled) {
9887                 ctx.tp = TASK_EXTENTS;
9888                 task_start(ctx.info);
9889         }
9890
9891 again:
9892         root1 = root->fs_info->tree_root;
9893         level = btrfs_header_level(root1->node);
9894         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9895                                     root1->node->start, 0, level, 0,
9896                                     root1->fs_info->nodesize, NULL);
9897         if (ret < 0)
9898                 goto out;
9899         root1 = root->fs_info->chunk_root;
9900         level = btrfs_header_level(root1->node);
9901         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9902                                     root1->node->start, 0, level, 0,
9903                                     root1->fs_info->nodesize, NULL);
9904         if (ret < 0)
9905                 goto out;
9906         btrfs_init_path(&path);
9907         key.offset = 0;
9908         key.objectid = 0;
9909         key.type = BTRFS_ROOT_ITEM_KEY;
9910         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9911                                         &key, &path, 0, 0);
9912         if (ret < 0)
9913                 goto out;
9914         while(1) {
9915                 leaf = path.nodes[0];
9916                 slot = path.slots[0];
9917                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9918                         ret = btrfs_next_leaf(root, &path);
9919                         if (ret != 0)
9920                                 break;
9921                         leaf = path.nodes[0];
9922                         slot = path.slots[0];
9923                 }
9924                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9925                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9926                         unsigned long offset;
9927                         u64 last_snapshot;
9928
9929                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9930                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9931                         last_snapshot = btrfs_root_last_snapshot(&ri);
9932                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9933                                 level = btrfs_root_level(&ri);
9934                                 level_size = root->fs_info->nodesize;
9935                                 ret = add_root_item_to_list(&normal_trees,
9936                                                 found_key.objectid,
9937                                                 btrfs_root_bytenr(&ri),
9938                                                 last_snapshot, level,
9939                                                 0, level_size, NULL);
9940                                 if (ret < 0)
9941                                         goto out;
9942                         } else {
9943                                 level = btrfs_root_level(&ri);
9944                                 level_size = root->fs_info->nodesize;
9945                                 objectid = found_key.objectid;
9946                                 btrfs_disk_key_to_cpu(&found_key,
9947                                                       &ri.drop_progress);
9948                                 ret = add_root_item_to_list(&dropping_trees,
9949                                                 objectid,
9950                                                 btrfs_root_bytenr(&ri),
9951                                                 last_snapshot, level,
9952                                                 ri.drop_level,
9953                                                 level_size, &found_key);
9954                                 if (ret < 0)
9955                                         goto out;
9956                         }
9957                 }
9958                 path.slots[0]++;
9959         }
9960         btrfs_release_path(&path);
9961
9962         /*
9963          * check_block can return -EAGAIN if it fixes something, please keep
9964          * this in mind when dealing with return values from these functions, if
9965          * we get -EAGAIN we want to fall through and restart the loop.
9966          */
9967         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9968                                   &seen, &reada, &nodes, &extent_cache,
9969                                   &chunk_cache, &dev_cache, &block_group_cache,
9970                                   &dev_extent_cache);
9971         if (ret < 0) {
9972                 if (ret == -EAGAIN)
9973                         goto loop;
9974                 goto out;
9975         }
9976         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9977                                   &pending, &seen, &reada, &nodes,
9978                                   &extent_cache, &chunk_cache, &dev_cache,
9979                                   &block_group_cache, &dev_extent_cache);
9980         if (ret < 0) {
9981                 if (ret == -EAGAIN)
9982                         goto loop;
9983                 goto out;
9984         }
9985
9986         ret = check_chunks(&chunk_cache, &block_group_cache,
9987                            &dev_extent_cache, NULL, NULL, NULL, 0);
9988         if (ret) {
9989                 if (ret == -EAGAIN)
9990                         goto loop;
9991                 err = ret;
9992         }
9993
9994         ret = check_extent_refs(root, &extent_cache);
9995         if (ret < 0) {
9996                 if (ret == -EAGAIN)
9997                         goto loop;
9998                 goto out;
9999         }
10000
10001         ret = check_devices(&dev_cache, &dev_extent_cache);
10002         if (ret && err)
10003                 ret = err;
10004
10005 out:
10006         task_stop(ctx.info);
10007         if (repair) {
10008                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10009                 extent_io_tree_cleanup(&excluded_extents);
10010                 root->fs_info->fsck_extent_cache = NULL;
10011                 root->fs_info->free_extent_hook = NULL;
10012                 root->fs_info->corrupt_blocks = NULL;
10013                 root->fs_info->excluded_extents = NULL;
10014         }
10015         free(bits);
10016         free_chunk_cache_tree(&chunk_cache);
10017         free_device_cache_tree(&dev_cache);
10018         free_block_group_tree(&block_group_cache);
10019         free_device_extent_tree(&dev_extent_cache);
10020         free_extent_cache_tree(&seen);
10021         free_extent_cache_tree(&pending);
10022         free_extent_cache_tree(&reada);
10023         free_extent_cache_tree(&nodes);
10024         free_root_item_list(&normal_trees);
10025         free_root_item_list(&dropping_trees);
10026         return ret;
10027 loop:
10028         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10029         free_extent_cache_tree(&seen);
10030         free_extent_cache_tree(&pending);
10031         free_extent_cache_tree(&reada);
10032         free_extent_cache_tree(&nodes);
10033         free_chunk_cache_tree(&chunk_cache);
10034         free_block_group_tree(&block_group_cache);
10035         free_device_cache_tree(&dev_cache);
10036         free_device_extent_tree(&dev_extent_cache);
10037         free_extent_record_cache(&extent_cache);
10038         free_root_item_list(&normal_trees);
10039         free_root_item_list(&dropping_trees);
10040         extent_io_tree_cleanup(&excluded_extents);
10041         goto again;
10042 }
10043
10044 /*
10045  * Check backrefs of a tree block given by @bytenr or @eb.
10046  *
10047  * @root:       the root containing the @bytenr or @eb
10048  * @eb:         tree block extent buffer, can be NULL
10049  * @bytenr:     bytenr of the tree block to search
10050  * @level:      tree level of the tree block
10051  * @owner:      owner of the tree block
10052  *
10053  * Return >0 for any error found and output error message
10054  * Return 0 for no error found
10055  */
10056 static int check_tree_block_ref(struct btrfs_root *root,
10057                                 struct extent_buffer *eb, u64 bytenr,
10058                                 int level, u64 owner)
10059 {
10060         struct btrfs_key key;
10061         struct btrfs_root *extent_root = root->fs_info->extent_root;
10062         struct btrfs_path path;
10063         struct btrfs_extent_item *ei;
10064         struct btrfs_extent_inline_ref *iref;
10065         struct extent_buffer *leaf;
10066         unsigned long end;
10067         unsigned long ptr;
10068         int slot;
10069         int skinny_level;
10070         int type;
10071         u32 nodesize = root->fs_info->nodesize;
10072         u32 item_size;
10073         u64 offset;
10074         int tree_reloc_root = 0;
10075         int found_ref = 0;
10076         int err = 0;
10077         int ret;
10078
10079         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10080             btrfs_header_bytenr(root->node) == bytenr)
10081                 tree_reloc_root = 1;
10082
10083         btrfs_init_path(&path);
10084         key.objectid = bytenr;
10085         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10086                 key.type = BTRFS_METADATA_ITEM_KEY;
10087         else
10088                 key.type = BTRFS_EXTENT_ITEM_KEY;
10089         key.offset = (u64)-1;
10090
10091         /* Search for the backref in extent tree */
10092         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10093         if (ret < 0) {
10094                 err |= BACKREF_MISSING;
10095                 goto out;
10096         }
10097         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10098         if (ret) {
10099                 err |= BACKREF_MISSING;
10100                 goto out;
10101         }
10102
10103         leaf = path.nodes[0];
10104         slot = path.slots[0];
10105         btrfs_item_key_to_cpu(leaf, &key, slot);
10106
10107         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10108
10109         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10110                 skinny_level = (int)key.offset;
10111                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10112         } else {
10113                 struct btrfs_tree_block_info *info;
10114
10115                 info = (struct btrfs_tree_block_info *)(ei + 1);
10116                 skinny_level = btrfs_tree_block_level(leaf, info);
10117                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10118         }
10119
10120         if (eb) {
10121                 u64 header_gen;
10122                 u64 extent_gen;
10123
10124                 if (!(btrfs_extent_flags(leaf, ei) &
10125                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10126                         error(
10127                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10128                                 key.objectid, nodesize,
10129                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10130                         err = BACKREF_MISMATCH;
10131                 }
10132                 header_gen = btrfs_header_generation(eb);
10133                 extent_gen = btrfs_extent_generation(leaf, ei);
10134                 if (header_gen != extent_gen) {
10135                         error(
10136         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10137                                 key.objectid, nodesize, header_gen,
10138                                 extent_gen);
10139                         err = BACKREF_MISMATCH;
10140                 }
10141                 if (level != skinny_level) {
10142                         error(
10143                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10144                                 key.objectid, nodesize, level, skinny_level);
10145                         err = BACKREF_MISMATCH;
10146                 }
10147                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10148                         error(
10149                         "extent[%llu %u] is referred by other roots than %llu",
10150                                 key.objectid, nodesize, root->objectid);
10151                         err = BACKREF_MISMATCH;
10152                 }
10153         }
10154
10155         /*
10156          * Iterate the extent/metadata item to find the exact backref
10157          */
10158         item_size = btrfs_item_size_nr(leaf, slot);
10159         ptr = (unsigned long)iref;
10160         end = (unsigned long)ei + item_size;
10161         while (ptr < end) {
10162                 iref = (struct btrfs_extent_inline_ref *)ptr;
10163                 type = btrfs_extent_inline_ref_type(leaf, iref);
10164                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10165
10166                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10167                         (offset == root->objectid || offset == owner)) {
10168                         found_ref = 1;
10169                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10170                         /*
10171                          * Backref of tree reloc root points to itself, no need
10172                          * to check backref any more.
10173                          */
10174                         if (tree_reloc_root)
10175                                 found_ref = 1;
10176                         else
10177                         /* Check if the backref points to valid referencer */
10178                                 found_ref = !check_tree_block_ref(root, NULL,
10179                                                 offset, level + 1, owner);
10180                 }
10181
10182                 if (found_ref)
10183                         break;
10184                 ptr += btrfs_extent_inline_ref_size(type);
10185         }
10186
10187         /*
10188          * Inlined extent item doesn't have what we need, check
10189          * TREE_BLOCK_REF_KEY
10190          */
10191         if (!found_ref) {
10192                 btrfs_release_path(&path);
10193                 key.objectid = bytenr;
10194                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10195                 key.offset = root->objectid;
10196
10197                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10198                 if (!ret)
10199                         found_ref = 1;
10200         }
10201         if (!found_ref)
10202                 err |= BACKREF_MISSING;
10203 out:
10204         btrfs_release_path(&path);
10205         if (eb && (err & BACKREF_MISSING))
10206                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10207                         bytenr, nodesize, owner, level);
10208         return err;
10209 }
10210
10211 /*
10212  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10213  *
10214  * Return >0 any error found and output error message
10215  * Return 0 for no error found
10216  */
10217 static int check_extent_data_item(struct btrfs_root *root,
10218                                   struct extent_buffer *eb, int slot)
10219 {
10220         struct btrfs_file_extent_item *fi;
10221         struct btrfs_path path;
10222         struct btrfs_root *extent_root = root->fs_info->extent_root;
10223         struct btrfs_key fi_key;
10224         struct btrfs_key dbref_key;
10225         struct extent_buffer *leaf;
10226         struct btrfs_extent_item *ei;
10227         struct btrfs_extent_inline_ref *iref;
10228         struct btrfs_extent_data_ref *dref;
10229         u64 owner;
10230         u64 disk_bytenr;
10231         u64 disk_num_bytes;
10232         u64 extent_num_bytes;
10233         u64 extent_flags;
10234         u32 item_size;
10235         unsigned long end;
10236         unsigned long ptr;
10237         int type;
10238         u64 ref_root;
10239         int found_dbackref = 0;
10240         int err = 0;
10241         int ret;
10242
10243         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10244         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10245
10246         /* Nothing to check for hole and inline data extents */
10247         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10248             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10249                 return 0;
10250
10251         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10252         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10253         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10254
10255         /* Check unaligned disk_num_bytes and num_bytes */
10256         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10257                 error(
10258 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10259                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10260                         root->fs_info->sectorsize);
10261                 err |= BYTES_UNALIGNED;
10262         } else {
10263                 data_bytes_allocated += disk_num_bytes;
10264         }
10265         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10266                 error(
10267 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10268                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10269                         root->fs_info->sectorsize);
10270                 err |= BYTES_UNALIGNED;
10271         } else {
10272                 data_bytes_referenced += extent_num_bytes;
10273         }
10274         owner = btrfs_header_owner(eb);
10275
10276         /* Check the extent item of the file extent in extent tree */
10277         btrfs_init_path(&path);
10278         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10279         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10280         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10281
10282         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10283         if (ret)
10284                 goto out;
10285
10286         leaf = path.nodes[0];
10287         slot = path.slots[0];
10288         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10289
10290         extent_flags = btrfs_extent_flags(leaf, ei);
10291
10292         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10293                 error(
10294                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10295                     disk_bytenr, disk_num_bytes,
10296                     BTRFS_EXTENT_FLAG_DATA);
10297                 err |= BACKREF_MISMATCH;
10298         }
10299
10300         /* Check data backref inside that extent item */
10301         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10302         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10303         ptr = (unsigned long)iref;
10304         end = (unsigned long)ei + item_size;
10305         while (ptr < end) {
10306                 iref = (struct btrfs_extent_inline_ref *)ptr;
10307                 type = btrfs_extent_inline_ref_type(leaf, iref);
10308                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10309
10310                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10311                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10312                         if (ref_root == owner || ref_root == root->objectid)
10313                                 found_dbackref = 1;
10314                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10315                         found_dbackref = !check_tree_block_ref(root, NULL,
10316                                 btrfs_extent_inline_ref_offset(leaf, iref),
10317                                 0, owner);
10318                 }
10319
10320                 if (found_dbackref)
10321                         break;
10322                 ptr += btrfs_extent_inline_ref_size(type);
10323         }
10324
10325         if (!found_dbackref) {
10326                 btrfs_release_path(&path);
10327
10328                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10329                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10330                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10331                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10332                                 fi_key.objectid, fi_key.offset);
10333
10334                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10335                                         &dbref_key, &path, 0, 0);
10336                 if (!ret) {
10337                         found_dbackref = 1;
10338                         goto out;
10339                 }
10340
10341                 btrfs_release_path(&path);
10342
10343                 /*
10344                  * Neither inlined nor EXTENT_DATA_REF found, try
10345                  * SHARED_DATA_REF as last chance.
10346                  */
10347                 dbref_key.objectid = disk_bytenr;
10348                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10349                 dbref_key.offset = eb->start;
10350
10351                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10352                                         &dbref_key, &path, 0, 0);
10353                 if (!ret) {
10354                         found_dbackref = 1;
10355                         goto out;
10356                 }
10357         }
10358
10359 out:
10360         if (!found_dbackref)
10361                 err |= BACKREF_MISSING;
10362         btrfs_release_path(&path);
10363         if (err & BACKREF_MISSING) {
10364                 error("data extent[%llu %llu] backref lost",
10365                       disk_bytenr, disk_num_bytes);
10366         }
10367         return err;
10368 }
10369
10370 /*
10371  * Get real tree block level for the case like shared block
10372  * Return >= 0 as tree level
10373  * Return <0 for error
10374  */
10375 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10376 {
10377         struct extent_buffer *eb;
10378         struct btrfs_path path;
10379         struct btrfs_key key;
10380         struct btrfs_extent_item *ei;
10381         u64 flags;
10382         u64 transid;
10383         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10384         u8 backref_level;
10385         u8 header_level;
10386         int ret;
10387
10388         /* Search extent tree for extent generation and level */
10389         key.objectid = bytenr;
10390         key.type = BTRFS_METADATA_ITEM_KEY;
10391         key.offset = (u64)-1;
10392
10393         btrfs_init_path(&path);
10394         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10395         if (ret < 0)
10396                 goto release_out;
10397         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10398         if (ret < 0)
10399                 goto release_out;
10400         if (ret > 0) {
10401                 ret = -ENOENT;
10402                 goto release_out;
10403         }
10404
10405         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10406         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10407                             struct btrfs_extent_item);
10408         flags = btrfs_extent_flags(path.nodes[0], ei);
10409         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10410                 ret = -ENOENT;
10411                 goto release_out;
10412         }
10413
10414         /* Get transid for later read_tree_block() check */
10415         transid = btrfs_extent_generation(path.nodes[0], ei);
10416
10417         /* Get backref level as one source */
10418         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10419                 backref_level = key.offset;
10420         } else {
10421                 struct btrfs_tree_block_info *info;
10422
10423                 info = (struct btrfs_tree_block_info *)(ei + 1);
10424                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10425         }
10426         btrfs_release_path(&path);
10427
10428         /* Get level from tree block as an alternative source */
10429         eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10430         if (!extent_buffer_uptodate(eb)) {
10431                 free_extent_buffer(eb);
10432                 return -EIO;
10433         }
10434         header_level = btrfs_header_level(eb);
10435         free_extent_buffer(eb);
10436
10437         if (header_level != backref_level)
10438                 return -EIO;
10439         return header_level;
10440
10441 release_out:
10442         btrfs_release_path(&path);
10443         return ret;
10444 }
10445
10446 /*
10447  * Check if a tree block backref is valid (points to a valid tree block)
10448  * if level == -1, level will be resolved
10449  * Return >0 for any error found and print error message
10450  */
10451 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10452                                     u64 bytenr, int level)
10453 {
10454         struct btrfs_root *root;
10455         struct btrfs_key key;
10456         struct btrfs_path path;
10457         struct extent_buffer *eb;
10458         struct extent_buffer *node;
10459         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10460         int err = 0;
10461         int ret;
10462
10463         /* Query level for level == -1 special case */
10464         if (level == -1)
10465                 level = query_tree_block_level(fs_info, bytenr);
10466         if (level < 0) {
10467                 err |= REFERENCER_MISSING;
10468                 goto out;
10469         }
10470
10471         key.objectid = root_id;
10472         key.type = BTRFS_ROOT_ITEM_KEY;
10473         key.offset = (u64)-1;
10474
10475         root = btrfs_read_fs_root(fs_info, &key);
10476         if (IS_ERR(root)) {
10477                 err |= REFERENCER_MISSING;
10478                 goto out;
10479         }
10480
10481         /* Read out the tree block to get item/node key */
10482         eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10483         if (!extent_buffer_uptodate(eb)) {
10484                 err |= REFERENCER_MISSING;
10485                 free_extent_buffer(eb);
10486                 goto out;
10487         }
10488
10489         /* Empty tree, no need to check key */
10490         if (!btrfs_header_nritems(eb) && !level) {
10491                 free_extent_buffer(eb);
10492                 goto out;
10493         }
10494
10495         if (level)
10496                 btrfs_node_key_to_cpu(eb, &key, 0);
10497         else
10498                 btrfs_item_key_to_cpu(eb, &key, 0);
10499
10500         free_extent_buffer(eb);
10501
10502         btrfs_init_path(&path);
10503         path.lowest_level = level;
10504         /* Search with the first key, to ensure we can reach it */
10505         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10506         if (ret < 0) {
10507                 err |= REFERENCER_MISSING;
10508                 goto release_out;
10509         }
10510
10511         node = path.nodes[level];
10512         if (btrfs_header_bytenr(node) != bytenr) {
10513                 error(
10514         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10515                         bytenr, nodesize, bytenr,
10516                         btrfs_header_bytenr(node));
10517                 err |= REFERENCER_MISMATCH;
10518         }
10519         if (btrfs_header_level(node) != level) {
10520                 error(
10521         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10522                         bytenr, nodesize, level,
10523                         btrfs_header_level(node));
10524                 err |= REFERENCER_MISMATCH;
10525         }
10526
10527 release_out:
10528         btrfs_release_path(&path);
10529 out:
10530         if (err & REFERENCER_MISSING) {
10531                 if (level < 0)
10532                         error("extent [%llu %d] lost referencer (owner: %llu)",
10533                                 bytenr, nodesize, root_id);
10534                 else
10535                         error(
10536                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10537                                 bytenr, nodesize, root_id, level);
10538         }
10539
10540         return err;
10541 }
10542
10543 /*
10544  * Check if tree block @eb is tree reloc root.
10545  * Return 0 if it's not or any problem happens
10546  * Return 1 if it's a tree reloc root
10547  */
10548 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10549                                  struct extent_buffer *eb)
10550 {
10551         struct btrfs_root *tree_reloc_root;
10552         struct btrfs_key key;
10553         u64 bytenr = btrfs_header_bytenr(eb);
10554         u64 owner = btrfs_header_owner(eb);
10555         int ret = 0;
10556
10557         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10558         key.offset = owner;
10559         key.type = BTRFS_ROOT_ITEM_KEY;
10560
10561         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10562         if (IS_ERR(tree_reloc_root))
10563                 return 0;
10564
10565         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10566                 ret = 1;
10567         btrfs_free_fs_root(tree_reloc_root);
10568         return ret;
10569 }
10570
10571 /*
10572  * Check referencer for shared block backref
10573  * If level == -1, this function will resolve the level.
10574  */
10575 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10576                                      u64 parent, u64 bytenr, int level)
10577 {
10578         struct extent_buffer *eb;
10579         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10580         u32 nr;
10581         int found_parent = 0;
10582         int i;
10583
10584         eb = read_tree_block(fs_info, parent, nodesize, 0);
10585         if (!extent_buffer_uptodate(eb))
10586                 goto out;
10587
10588         if (level == -1)
10589                 level = query_tree_block_level(fs_info, bytenr);
10590         if (level < 0)
10591                 goto out;
10592
10593         /* It's possible it's a tree reloc root */
10594         if (parent == bytenr) {
10595                 if (is_tree_reloc_root(fs_info, eb))
10596                         found_parent = 1;
10597                 goto out;
10598         }
10599
10600         if (level + 1 != btrfs_header_level(eb))
10601                 goto out;
10602
10603         nr = btrfs_header_nritems(eb);
10604         for (i = 0; i < nr; i++) {
10605                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10606                         found_parent = 1;
10607                         break;
10608                 }
10609         }
10610 out:
10611         free_extent_buffer(eb);
10612         if (!found_parent) {
10613                 error(
10614         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10615                         bytenr, nodesize, parent, level);
10616                 return REFERENCER_MISSING;
10617         }
10618         return 0;
10619 }
10620
10621 /*
10622  * Check referencer for normal (inlined) data ref
10623  * If len == 0, it will be resolved by searching in extent tree
10624  */
10625 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10626                                      u64 root_id, u64 objectid, u64 offset,
10627                                      u64 bytenr, u64 len, u32 count)
10628 {
10629         struct btrfs_root *root;
10630         struct btrfs_root *extent_root = fs_info->extent_root;
10631         struct btrfs_key key;
10632         struct btrfs_path path;
10633         struct extent_buffer *leaf;
10634         struct btrfs_file_extent_item *fi;
10635         u32 found_count = 0;
10636         int slot;
10637         int ret = 0;
10638
10639         if (!len) {
10640                 key.objectid = bytenr;
10641                 key.type = BTRFS_EXTENT_ITEM_KEY;
10642                 key.offset = (u64)-1;
10643
10644                 btrfs_init_path(&path);
10645                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10646                 if (ret < 0)
10647                         goto out;
10648                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10649                 if (ret)
10650                         goto out;
10651                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10652                 if (key.objectid != bytenr ||
10653                     key.type != BTRFS_EXTENT_ITEM_KEY)
10654                         goto out;
10655                 len = key.offset;
10656                 btrfs_release_path(&path);
10657         }
10658         key.objectid = root_id;
10659         key.type = BTRFS_ROOT_ITEM_KEY;
10660         key.offset = (u64)-1;
10661         btrfs_init_path(&path);
10662
10663         root = btrfs_read_fs_root(fs_info, &key);
10664         if (IS_ERR(root))
10665                 goto out;
10666
10667         key.objectid = objectid;
10668         key.type = BTRFS_EXTENT_DATA_KEY;
10669         /*
10670          * It can be nasty as data backref offset is
10671          * file offset - file extent offset, which is smaller or
10672          * equal to original backref offset.  The only special case is
10673          * overflow.  So we need to special check and do further search.
10674          */
10675         key.offset = offset & (1ULL << 63) ? 0 : offset;
10676
10677         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10678         if (ret < 0)
10679                 goto out;
10680
10681         /*
10682          * Search afterwards to get correct one
10683          * NOTE: As we must do a comprehensive check on the data backref to
10684          * make sure the dref count also matches, we must iterate all file
10685          * extents for that inode.
10686          */
10687         while (1) {
10688                 leaf = path.nodes[0];
10689                 slot = path.slots[0];
10690
10691                 if (slot >= btrfs_header_nritems(leaf))
10692                         goto next;
10693                 btrfs_item_key_to_cpu(leaf, &key, slot);
10694                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10695                         break;
10696                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10697                 /*
10698                  * Except normal disk bytenr and disk num bytes, we still
10699                  * need to do extra check on dbackref offset as
10700                  * dbackref offset = file_offset - file_extent_offset
10701                  */
10702                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10703                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10704                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10705                     offset)
10706                         found_count++;
10707
10708 next:
10709                 ret = btrfs_next_item(root, &path);
10710                 if (ret)
10711                         break;
10712         }
10713 out:
10714         btrfs_release_path(&path);
10715         if (found_count != count) {
10716                 error(
10717 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10718                         bytenr, len, root_id, objectid, offset, count, found_count);
10719                 return REFERENCER_MISSING;
10720         }
10721         return 0;
10722 }
10723
10724 /*
10725  * Check if the referencer of a shared data backref exists
10726  */
10727 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10728                                      u64 parent, u64 bytenr)
10729 {
10730         struct extent_buffer *eb;
10731         struct btrfs_key key;
10732         struct btrfs_file_extent_item *fi;
10733         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10734         u32 nr;
10735         int found_parent = 0;
10736         int i;
10737
10738         eb = read_tree_block(fs_info, parent, nodesize, 0);
10739         if (!extent_buffer_uptodate(eb))
10740                 goto out;
10741
10742         nr = btrfs_header_nritems(eb);
10743         for (i = 0; i < nr; i++) {
10744                 btrfs_item_key_to_cpu(eb, &key, i);
10745                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10746                         continue;
10747
10748                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10749                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10750                         continue;
10751
10752                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10753                         found_parent = 1;
10754                         break;
10755                 }
10756         }
10757
10758 out:
10759         free_extent_buffer(eb);
10760         if (!found_parent) {
10761                 error("shared extent %llu referencer lost (parent: %llu)",
10762                         bytenr, parent);
10763                 return REFERENCER_MISSING;
10764         }
10765         return 0;
10766 }
10767
10768 /*
10769  * This function will check a given extent item, including its backref and
10770  * itself (like crossing stripe boundary and type)
10771  *
10772  * Since we don't use extent_record anymore, introduce new error bit
10773  */
10774 static int check_extent_item(struct btrfs_fs_info *fs_info,
10775                              struct extent_buffer *eb, int slot)
10776 {
10777         struct btrfs_extent_item *ei;
10778         struct btrfs_extent_inline_ref *iref;
10779         struct btrfs_extent_data_ref *dref;
10780         unsigned long end;
10781         unsigned long ptr;
10782         int type;
10783         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10784         u32 item_size = btrfs_item_size_nr(eb, slot);
10785         u64 flags;
10786         u64 offset;
10787         int metadata = 0;
10788         int level;
10789         struct btrfs_key key;
10790         int ret;
10791         int err = 0;
10792
10793         btrfs_item_key_to_cpu(eb, &key, slot);
10794         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10795                 bytes_used += key.offset;
10796         else
10797                 bytes_used += nodesize;
10798
10799         if (item_size < sizeof(*ei)) {
10800                 /*
10801                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10802                  * old thing when on disk format is still un-determined.
10803                  * No need to care about it anymore
10804                  */
10805                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10806                 return -ENOTTY;
10807         }
10808
10809         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10810         flags = btrfs_extent_flags(eb, ei);
10811
10812         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10813                 metadata = 1;
10814         if (metadata && check_crossing_stripes(global_info, key.objectid,
10815                                                eb->len)) {
10816                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10817                       key.objectid, key.objectid + nodesize);
10818                 err |= CROSSING_STRIPE_BOUNDARY;
10819         }
10820
10821         ptr = (unsigned long)(ei + 1);
10822
10823         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10824                 /* Old EXTENT_ITEM metadata */
10825                 struct btrfs_tree_block_info *info;
10826
10827                 info = (struct btrfs_tree_block_info *)ptr;
10828                 level = btrfs_tree_block_level(eb, info);
10829                 ptr += sizeof(struct btrfs_tree_block_info);
10830         } else {
10831                 /* New METADATA_ITEM */
10832                 level = key.offset;
10833         }
10834         end = (unsigned long)ei + item_size;
10835
10836 next:
10837         /* Reached extent item end normally */
10838         if (ptr == end)
10839                 goto out;
10840
10841         /* Beyond extent item end, wrong item size */
10842         if (ptr > end) {
10843                 err |= ITEM_SIZE_MISMATCH;
10844                 error("extent item at bytenr %llu slot %d has wrong size",
10845                         eb->start, slot);
10846                 goto out;
10847         }
10848
10849         /* Now check every backref in this extent item */
10850         iref = (struct btrfs_extent_inline_ref *)ptr;
10851         type = btrfs_extent_inline_ref_type(eb, iref);
10852         offset = btrfs_extent_inline_ref_offset(eb, iref);
10853         switch (type) {
10854         case BTRFS_TREE_BLOCK_REF_KEY:
10855                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10856                                                level);
10857                 err |= ret;
10858                 break;
10859         case BTRFS_SHARED_BLOCK_REF_KEY:
10860                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10861                                                  level);
10862                 err |= ret;
10863                 break;
10864         case BTRFS_EXTENT_DATA_REF_KEY:
10865                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10866                 ret = check_extent_data_backref(fs_info,
10867                                 btrfs_extent_data_ref_root(eb, dref),
10868                                 btrfs_extent_data_ref_objectid(eb, dref),
10869                                 btrfs_extent_data_ref_offset(eb, dref),
10870                                 key.objectid, key.offset,
10871                                 btrfs_extent_data_ref_count(eb, dref));
10872                 err |= ret;
10873                 break;
10874         case BTRFS_SHARED_DATA_REF_KEY:
10875                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10876                 err |= ret;
10877                 break;
10878         default:
10879                 error("extent[%llu %d %llu] has unknown ref type: %d",
10880                         key.objectid, key.type, key.offset, type);
10881                 err |= UNKNOWN_TYPE;
10882                 goto out;
10883         }
10884
10885         ptr += btrfs_extent_inline_ref_size(type);
10886         goto next;
10887
10888 out:
10889         return err;
10890 }
10891
10892 /*
10893  * Check if a dev extent item is referred correctly by its chunk
10894  */
10895 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10896                                  struct extent_buffer *eb, int slot)
10897 {
10898         struct btrfs_root *chunk_root = fs_info->chunk_root;
10899         struct btrfs_dev_extent *ptr;
10900         struct btrfs_path path;
10901         struct btrfs_key chunk_key;
10902         struct btrfs_key devext_key;
10903         struct btrfs_chunk *chunk;
10904         struct extent_buffer *l;
10905         int num_stripes;
10906         u64 length;
10907         int i;
10908         int found_chunk = 0;
10909         int ret;
10910
10911         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10912         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10913         length = btrfs_dev_extent_length(eb, ptr);
10914
10915         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10916         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10917         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10918
10919         btrfs_init_path(&path);
10920         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10921         if (ret)
10922                 goto out;
10923
10924         l = path.nodes[0];
10925         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10926         if (btrfs_chunk_length(l, chunk) != length)
10927                 goto out;
10928
10929         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10930         for (i = 0; i < num_stripes; i++) {
10931                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10932                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10933
10934                 if (devid == devext_key.objectid &&
10935                     offset == devext_key.offset) {
10936                         found_chunk = 1;
10937                         break;
10938                 }
10939         }
10940 out:
10941         btrfs_release_path(&path);
10942         if (!found_chunk) {
10943                 error(
10944                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10945                         devext_key.objectid, devext_key.offset, length);
10946                 return REFERENCER_MISSING;
10947         }
10948         return 0;
10949 }
10950
10951 /*
10952  * Check if the used space is correct with the dev item
10953  */
10954 static int check_dev_item(struct btrfs_fs_info *fs_info,
10955                           struct extent_buffer *eb, int slot)
10956 {
10957         struct btrfs_root *dev_root = fs_info->dev_root;
10958         struct btrfs_dev_item *dev_item;
10959         struct btrfs_path path;
10960         struct btrfs_key key;
10961         struct btrfs_dev_extent *ptr;
10962         u64 dev_id;
10963         u64 used;
10964         u64 total = 0;
10965         int ret;
10966
10967         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10968         dev_id = btrfs_device_id(eb, dev_item);
10969         used = btrfs_device_bytes_used(eb, dev_item);
10970
10971         key.objectid = dev_id;
10972         key.type = BTRFS_DEV_EXTENT_KEY;
10973         key.offset = 0;
10974
10975         btrfs_init_path(&path);
10976         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10977         if (ret < 0) {
10978                 btrfs_item_key_to_cpu(eb, &key, slot);
10979                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10980                         key.objectid, key.type, key.offset);
10981                 btrfs_release_path(&path);
10982                 return REFERENCER_MISSING;
10983         }
10984
10985         /* Iterate dev_extents to calculate the used space of a device */
10986         while (1) {
10987                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10988                         goto next;
10989
10990                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10991                 if (key.objectid > dev_id)
10992                         break;
10993                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10994                         goto next;
10995
10996                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10997                                      struct btrfs_dev_extent);
10998                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10999 next:
11000                 ret = btrfs_next_item(dev_root, &path);
11001                 if (ret)
11002                         break;
11003         }
11004         btrfs_release_path(&path);
11005
11006         if (used != total) {
11007                 btrfs_item_key_to_cpu(eb, &key, slot);
11008                 error(
11009 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11010                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11011                         BTRFS_DEV_EXTENT_KEY, dev_id);
11012                 return ACCOUNTING_MISMATCH;
11013         }
11014         return 0;
11015 }
11016
11017 /*
11018  * Check a block group item with its referener (chunk) and its used space
11019  * with extent/metadata item
11020  */
11021 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11022                                   struct extent_buffer *eb, int slot)
11023 {
11024         struct btrfs_root *extent_root = fs_info->extent_root;
11025         struct btrfs_root *chunk_root = fs_info->chunk_root;
11026         struct btrfs_block_group_item *bi;
11027         struct btrfs_block_group_item bg_item;
11028         struct btrfs_path path;
11029         struct btrfs_key bg_key;
11030         struct btrfs_key chunk_key;
11031         struct btrfs_key extent_key;
11032         struct btrfs_chunk *chunk;
11033         struct extent_buffer *leaf;
11034         struct btrfs_extent_item *ei;
11035         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11036         u64 flags;
11037         u64 bg_flags;
11038         u64 used;
11039         u64 total = 0;
11040         int ret;
11041         int err = 0;
11042
11043         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11044         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11045         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11046         used = btrfs_block_group_used(&bg_item);
11047         bg_flags = btrfs_block_group_flags(&bg_item);
11048
11049         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11050         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11051         chunk_key.offset = bg_key.objectid;
11052
11053         btrfs_init_path(&path);
11054         /* Search for the referencer chunk */
11055         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11056         if (ret) {
11057                 error(
11058                 "block group[%llu %llu] did not find the related chunk item",
11059                         bg_key.objectid, bg_key.offset);
11060                 err |= REFERENCER_MISSING;
11061         } else {
11062                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11063                                         struct btrfs_chunk);
11064                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11065                                                 bg_key.offset) {
11066                         error(
11067         "block group[%llu %llu] related chunk item length does not match",
11068                                 bg_key.objectid, bg_key.offset);
11069                         err |= REFERENCER_MISMATCH;
11070                 }
11071         }
11072         btrfs_release_path(&path);
11073
11074         /* Search from the block group bytenr */
11075         extent_key.objectid = bg_key.objectid;
11076         extent_key.type = 0;
11077         extent_key.offset = 0;
11078
11079         btrfs_init_path(&path);
11080         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11081         if (ret < 0)
11082                 goto out;
11083
11084         /* Iterate extent tree to account used space */
11085         while (1) {
11086                 leaf = path.nodes[0];
11087
11088                 /* Search slot can point to the last item beyond leaf nritems */
11089                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11090                         goto next;
11091
11092                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11093                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11094                         break;
11095
11096                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11097                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11098                         goto next;
11099                 if (extent_key.objectid < bg_key.objectid)
11100                         goto next;
11101
11102                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11103                         total += nodesize;
11104                 else
11105                         total += extent_key.offset;
11106
11107                 ei = btrfs_item_ptr(leaf, path.slots[0],
11108                                     struct btrfs_extent_item);
11109                 flags = btrfs_extent_flags(leaf, ei);
11110                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11111                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11112                                 error(
11113                         "bad extent[%llu, %llu) type mismatch with chunk",
11114                                         extent_key.objectid,
11115                                         extent_key.objectid + extent_key.offset);
11116                                 err |= CHUNK_TYPE_MISMATCH;
11117                         }
11118                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11119                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11120                                     BTRFS_BLOCK_GROUP_METADATA))) {
11121                                 error(
11122                         "bad extent[%llu, %llu) type mismatch with chunk",
11123                                         extent_key.objectid,
11124                                         extent_key.objectid + nodesize);
11125                                 err |= CHUNK_TYPE_MISMATCH;
11126                         }
11127                 }
11128 next:
11129                 ret = btrfs_next_item(extent_root, &path);
11130                 if (ret)
11131                         break;
11132         }
11133
11134 out:
11135         btrfs_release_path(&path);
11136
11137         if (total != used) {
11138                 error(
11139                 "block group[%llu %llu] used %llu but extent items used %llu",
11140                         bg_key.objectid, bg_key.offset, used, total);
11141                 err |= ACCOUNTING_MISMATCH;
11142         }
11143         return err;
11144 }
11145
11146 /*
11147  * Check a chunk item.
11148  * Including checking all referred dev_extents and block group
11149  */
11150 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11151                             struct extent_buffer *eb, int slot)
11152 {
11153         struct btrfs_root *extent_root = fs_info->extent_root;
11154         struct btrfs_root *dev_root = fs_info->dev_root;
11155         struct btrfs_path path;
11156         struct btrfs_key chunk_key;
11157         struct btrfs_key bg_key;
11158         struct btrfs_key devext_key;
11159         struct btrfs_chunk *chunk;
11160         struct extent_buffer *leaf;
11161         struct btrfs_block_group_item *bi;
11162         struct btrfs_block_group_item bg_item;
11163         struct btrfs_dev_extent *ptr;
11164         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11165         u64 length;
11166         u64 chunk_end;
11167         u64 type;
11168         u64 profile;
11169         int num_stripes;
11170         u64 offset;
11171         u64 objectid;
11172         int i;
11173         int ret;
11174         int err = 0;
11175
11176         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11177         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11178         length = btrfs_chunk_length(eb, chunk);
11179         chunk_end = chunk_key.offset + length;
11180         if (!IS_ALIGNED(length, sectorsize)) {
11181                 error("chunk[%llu %llu) not aligned to %u",
11182                         chunk_key.offset, chunk_end, sectorsize);
11183                 err |= BYTES_UNALIGNED;
11184                 goto out;
11185         }
11186
11187         type = btrfs_chunk_type(eb, chunk);
11188         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11189         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11190                 error("chunk[%llu %llu) has no chunk type",
11191                         chunk_key.offset, chunk_end);
11192                 err |= UNKNOWN_TYPE;
11193         }
11194         if (profile && (profile & (profile - 1))) {
11195                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11196                         chunk_key.offset, chunk_end, profile);
11197                 err |= UNKNOWN_TYPE;
11198         }
11199
11200         bg_key.objectid = chunk_key.offset;
11201         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11202         bg_key.offset = length;
11203
11204         btrfs_init_path(&path);
11205         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11206         if (ret) {
11207                 error(
11208                 "chunk[%llu %llu) did not find the related block group item",
11209                         chunk_key.offset, chunk_end);
11210                 err |= REFERENCER_MISSING;
11211         } else{
11212                 leaf = path.nodes[0];
11213                 bi = btrfs_item_ptr(leaf, path.slots[0],
11214                                     struct btrfs_block_group_item);
11215                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11216                                    sizeof(bg_item));
11217                 if (btrfs_block_group_flags(&bg_item) != type) {
11218                         error(
11219 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11220                                 chunk_key.offset, chunk_end, type,
11221                                 btrfs_block_group_flags(&bg_item));
11222                         err |= REFERENCER_MISSING;
11223                 }
11224         }
11225
11226         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11227         for (i = 0; i < num_stripes; i++) {
11228                 btrfs_release_path(&path);
11229                 btrfs_init_path(&path);
11230                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11231                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11232                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11233
11234                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11235                                         0, 0);
11236                 if (ret)
11237                         goto not_match_dev;
11238
11239                 leaf = path.nodes[0];
11240                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11241                                      struct btrfs_dev_extent);
11242                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11243                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11244                 if (objectid != chunk_key.objectid ||
11245                     offset != chunk_key.offset ||
11246                     btrfs_dev_extent_length(leaf, ptr) != length)
11247                         goto not_match_dev;
11248                 continue;
11249 not_match_dev:
11250                 err |= BACKREF_MISSING;
11251                 error(
11252                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11253                         chunk_key.objectid, chunk_end, i);
11254                 continue;
11255         }
11256         btrfs_release_path(&path);
11257 out:
11258         return err;
11259 }
11260
11261 /*
11262  * Main entry function to check known items and update related accounting info
11263  */
11264 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11265 {
11266         struct btrfs_fs_info *fs_info = root->fs_info;
11267         struct btrfs_key key;
11268         int slot = 0;
11269         int type;
11270         struct btrfs_extent_data_ref *dref;
11271         int ret;
11272         int err = 0;
11273
11274 next:
11275         btrfs_item_key_to_cpu(eb, &key, slot);
11276         type = key.type;
11277
11278         switch (type) {
11279         case BTRFS_EXTENT_DATA_KEY:
11280                 ret = check_extent_data_item(root, eb, slot);
11281                 err |= ret;
11282                 break;
11283         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11284                 ret = check_block_group_item(fs_info, eb, slot);
11285                 err |= ret;
11286                 break;
11287         case BTRFS_DEV_ITEM_KEY:
11288                 ret = check_dev_item(fs_info, eb, slot);
11289                 err |= ret;
11290                 break;
11291         case BTRFS_CHUNK_ITEM_KEY:
11292                 ret = check_chunk_item(fs_info, eb, slot);
11293                 err |= ret;
11294                 break;
11295         case BTRFS_DEV_EXTENT_KEY:
11296                 ret = check_dev_extent_item(fs_info, eb, slot);
11297                 err |= ret;
11298                 break;
11299         case BTRFS_EXTENT_ITEM_KEY:
11300         case BTRFS_METADATA_ITEM_KEY:
11301                 ret = check_extent_item(fs_info, eb, slot);
11302                 err |= ret;
11303                 break;
11304         case BTRFS_EXTENT_CSUM_KEY:
11305                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11306                 break;
11307         case BTRFS_TREE_BLOCK_REF_KEY:
11308                 ret = check_tree_block_backref(fs_info, key.offset,
11309                                                key.objectid, -1);
11310                 err |= ret;
11311                 break;
11312         case BTRFS_EXTENT_DATA_REF_KEY:
11313                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11314                 ret = check_extent_data_backref(fs_info,
11315                                 btrfs_extent_data_ref_root(eb, dref),
11316                                 btrfs_extent_data_ref_objectid(eb, dref),
11317                                 btrfs_extent_data_ref_offset(eb, dref),
11318                                 key.objectid, 0,
11319                                 btrfs_extent_data_ref_count(eb, dref));
11320                 err |= ret;
11321                 break;
11322         case BTRFS_SHARED_BLOCK_REF_KEY:
11323                 ret = check_shared_block_backref(fs_info, key.offset,
11324                                                  key.objectid, -1);
11325                 err |= ret;
11326                 break;
11327         case BTRFS_SHARED_DATA_REF_KEY:
11328                 ret = check_shared_data_backref(fs_info, key.offset,
11329                                                 key.objectid);
11330                 err |= ret;
11331                 break;
11332         default:
11333                 break;
11334         }
11335
11336         if (++slot < btrfs_header_nritems(eb))
11337                 goto next;
11338
11339         return err;
11340 }
11341
11342 /*
11343  * Helper function for later fs/subvol tree check.  To determine if a tree
11344  * block should be checked.
11345  * This function will ensure only the direct referencer with lowest rootid to
11346  * check a fs/subvolume tree block.
11347  *
11348  * Backref check at extent tree would detect errors like missing subvolume
11349  * tree, so we can do aggressive check to reduce duplicated checks.
11350  */
11351 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11352 {
11353         struct btrfs_root *extent_root = root->fs_info->extent_root;
11354         struct btrfs_key key;
11355         struct btrfs_path path;
11356         struct extent_buffer *leaf;
11357         int slot;
11358         struct btrfs_extent_item *ei;
11359         unsigned long ptr;
11360         unsigned long end;
11361         int type;
11362         u32 item_size;
11363         u64 offset;
11364         struct btrfs_extent_inline_ref *iref;
11365         int ret;
11366
11367         btrfs_init_path(&path);
11368         key.objectid = btrfs_header_bytenr(eb);
11369         key.type = BTRFS_METADATA_ITEM_KEY;
11370         key.offset = (u64)-1;
11371
11372         /*
11373          * Any failure in backref resolving means we can't determine
11374          * whom the tree block belongs to.
11375          * So in that case, we need to check that tree block
11376          */
11377         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11378         if (ret < 0)
11379                 goto need_check;
11380
11381         ret = btrfs_previous_extent_item(extent_root, &path,
11382                                          btrfs_header_bytenr(eb));
11383         if (ret)
11384                 goto need_check;
11385
11386         leaf = path.nodes[0];
11387         slot = path.slots[0];
11388         btrfs_item_key_to_cpu(leaf, &key, slot);
11389         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11390
11391         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11392                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11393         } else {
11394                 struct btrfs_tree_block_info *info;
11395
11396                 info = (struct btrfs_tree_block_info *)(ei + 1);
11397                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11398         }
11399
11400         item_size = btrfs_item_size_nr(leaf, slot);
11401         ptr = (unsigned long)iref;
11402         end = (unsigned long)ei + item_size;
11403         while (ptr < end) {
11404                 iref = (struct btrfs_extent_inline_ref *)ptr;
11405                 type = btrfs_extent_inline_ref_type(leaf, iref);
11406                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11407
11408                 /*
11409                  * We only check the tree block if current root is
11410                  * the lowest referencer of it.
11411                  */
11412                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11413                     offset < root->objectid) {
11414                         btrfs_release_path(&path);
11415                         return 0;
11416                 }
11417
11418                 ptr += btrfs_extent_inline_ref_size(type);
11419         }
11420         /*
11421          * Normally we should also check keyed tree block ref, but that may be
11422          * very time consuming.  Inlined ref should already make us skip a lot
11423          * of refs now.  So skip search keyed tree block ref.
11424          */
11425
11426 need_check:
11427         btrfs_release_path(&path);
11428         return 1;
11429 }
11430
11431 /*
11432  * Traversal function for tree block. We will do:
11433  * 1) Skip shared fs/subvolume tree blocks
11434  * 2) Update related bytes accounting
11435  * 3) Pre-order traversal
11436  */
11437 static int traverse_tree_block(struct btrfs_root *root,
11438                                 struct extent_buffer *node)
11439 {
11440         struct extent_buffer *eb;
11441         struct btrfs_key key;
11442         struct btrfs_key drop_key;
11443         int level;
11444         u64 nr;
11445         int i;
11446         int err = 0;
11447         int ret;
11448
11449         /*
11450          * Skip shared fs/subvolume tree block, in that case they will
11451          * be checked by referencer with lowest rootid
11452          */
11453         if (is_fstree(root->objectid) && !should_check(root, node))
11454                 return 0;
11455
11456         /* Update bytes accounting */
11457         total_btree_bytes += node->len;
11458         if (fs_root_objectid(btrfs_header_owner(node)))
11459                 total_fs_tree_bytes += node->len;
11460         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11461                 total_extent_tree_bytes += node->len;
11462         if (!found_old_backref &&
11463             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11464             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11465             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11466                 found_old_backref = 1;
11467
11468         /* pre-order tranversal, check itself first */
11469         level = btrfs_header_level(node);
11470         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11471                                    btrfs_header_level(node),
11472                                    btrfs_header_owner(node));
11473         err |= ret;
11474         if (err)
11475                 error(
11476         "check %s failed root %llu bytenr %llu level %d, force continue check",
11477                         level ? "node":"leaf", root->objectid,
11478                         btrfs_header_bytenr(node), btrfs_header_level(node));
11479
11480         if (!level) {
11481                 btree_space_waste += btrfs_leaf_free_space(root, node);
11482                 ret = check_leaf_items(root, node);
11483                 err |= ret;
11484                 return err;
11485         }
11486
11487         nr = btrfs_header_nritems(node);
11488         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11489         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11490                 sizeof(struct btrfs_key_ptr);
11491
11492         /* Then check all its children */
11493         for (i = 0; i < nr; i++) {
11494                 u64 blocknr = btrfs_node_blockptr(node, i);
11495
11496                 btrfs_node_key_to_cpu(node, &key, i);
11497                 if (level == root->root_item.drop_level &&
11498                     is_dropped_key(&key, &drop_key))
11499                         continue;
11500
11501                 /*
11502                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11503                  * to call the function itself.
11504                  */
11505                 eb = read_tree_block(root->fs_info, blocknr,
11506                                 root->fs_info->nodesize, 0);
11507                 if (extent_buffer_uptodate(eb)) {
11508                         ret = traverse_tree_block(root, eb);
11509                         err |= ret;
11510                 }
11511                 free_extent_buffer(eb);
11512         }
11513
11514         return err;
11515 }
11516
11517 /*
11518  * Low memory usage version check_chunks_and_extents.
11519  */
11520 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11521 {
11522         struct btrfs_path path;
11523         struct btrfs_key key;
11524         struct btrfs_root *root1;
11525         struct btrfs_root *cur_root;
11526         int err = 0;
11527         int ret;
11528
11529         root1 = root->fs_info->chunk_root;
11530         ret = traverse_tree_block(root1, root1->node);
11531         err |= ret;
11532
11533         root1 = root->fs_info->tree_root;
11534         ret = traverse_tree_block(root1, root1->node);
11535         err |= ret;
11536
11537         btrfs_init_path(&path);
11538         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11539         key.offset = 0;
11540         key.type = BTRFS_ROOT_ITEM_KEY;
11541
11542         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11543         if (ret) {
11544                 error("cannot find extent treet in tree_root");
11545                 goto out;
11546         }
11547
11548         while (1) {
11549                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11550                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11551                         goto next;
11552                 key.offset = (u64)-1;
11553
11554                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11555                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11556                                         &key);
11557                 else
11558                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11559                 if (IS_ERR(cur_root) || !cur_root) {
11560                         error("failed to read tree: %lld", key.objectid);
11561                         goto next;
11562                 }
11563
11564                 ret = traverse_tree_block(cur_root, cur_root->node);
11565                 err |= ret;
11566
11567                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11568                         btrfs_free_fs_root(cur_root);
11569 next:
11570                 ret = btrfs_next_item(root1, &path);
11571                 if (ret)
11572                         goto out;
11573         }
11574
11575 out:
11576         btrfs_release_path(&path);
11577         return err;
11578 }
11579
11580 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11581                            struct btrfs_root *root, int overwrite)
11582 {
11583         struct extent_buffer *c;
11584         struct extent_buffer *old = root->node;
11585         int level;
11586         int ret;
11587         struct btrfs_disk_key disk_key = {0,0,0};
11588
11589         level = 0;
11590
11591         if (overwrite) {
11592                 c = old;
11593                 extent_buffer_get(c);
11594                 goto init;
11595         }
11596         c = btrfs_alloc_free_block(trans, root,
11597                                    root->fs_info->nodesize,
11598                                    root->root_key.objectid,
11599                                    &disk_key, level, 0, 0);
11600         if (IS_ERR(c)) {
11601                 c = old;
11602                 extent_buffer_get(c);
11603                 overwrite = 1;
11604         }
11605 init:
11606         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11607         btrfs_set_header_level(c, level);
11608         btrfs_set_header_bytenr(c, c->start);
11609         btrfs_set_header_generation(c, trans->transid);
11610         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11611         btrfs_set_header_owner(c, root->root_key.objectid);
11612
11613         write_extent_buffer(c, root->fs_info->fsid,
11614                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11615
11616         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11617                             btrfs_header_chunk_tree_uuid(c),
11618                             BTRFS_UUID_SIZE);
11619
11620         btrfs_mark_buffer_dirty(c);
11621         /*
11622          * this case can happen in the following case:
11623          *
11624          * 1.overwrite previous root.
11625          *
11626          * 2.reinit reloc data root, this is because we skip pin
11627          * down reloc data tree before which means we can allocate
11628          * same block bytenr here.
11629          */
11630         if (old->start == c->start) {
11631                 btrfs_set_root_generation(&root->root_item,
11632                                           trans->transid);
11633                 root->root_item.level = btrfs_header_level(root->node);
11634                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11635                                         &root->root_key, &root->root_item);
11636                 if (ret) {
11637                         free_extent_buffer(c);
11638                         return ret;
11639                 }
11640         }
11641         free_extent_buffer(old);
11642         root->node = c;
11643         add_root_to_dirty_list(root);
11644         return 0;
11645 }
11646
11647 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11648                                 struct extent_buffer *eb, int tree_root)
11649 {
11650         struct extent_buffer *tmp;
11651         struct btrfs_root_item *ri;
11652         struct btrfs_key key;
11653         u64 bytenr;
11654         u32 nodesize;
11655         int level = btrfs_header_level(eb);
11656         int nritems;
11657         int ret;
11658         int i;
11659
11660         /*
11661          * If we have pinned this block before, don't pin it again.
11662          * This can not only avoid forever loop with broken filesystem
11663          * but also give us some speedups.
11664          */
11665         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11666                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11667                 return 0;
11668
11669         btrfs_pin_extent(fs_info, eb->start, eb->len);
11670
11671         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11672         nritems = btrfs_header_nritems(eb);
11673         for (i = 0; i < nritems; i++) {
11674                 if (level == 0) {
11675                         btrfs_item_key_to_cpu(eb, &key, i);
11676                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11677                                 continue;
11678                         /* Skip the extent root and reloc roots */
11679                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11680                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11681                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11682                                 continue;
11683                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11684                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11685
11686                         /*
11687                          * If at any point we start needing the real root we
11688                          * will have to build a stump root for the root we are
11689                          * in, but for now this doesn't actually use the root so
11690                          * just pass in extent_root.
11691                          */
11692                         tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11693                         if (!extent_buffer_uptodate(tmp)) {
11694                                 fprintf(stderr, "Error reading root block\n");
11695                                 return -EIO;
11696                         }
11697                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11698                         free_extent_buffer(tmp);
11699                         if (ret)
11700                                 return ret;
11701                 } else {
11702                         bytenr = btrfs_node_blockptr(eb, i);
11703
11704                         /* If we aren't the tree root don't read the block */
11705                         if (level == 1 && !tree_root) {
11706                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11707                                 continue;
11708                         }
11709
11710                         tmp = read_tree_block(fs_info, bytenr,
11711                                               nodesize, 0);
11712                         if (!extent_buffer_uptodate(tmp)) {
11713                                 fprintf(stderr, "Error reading tree block\n");
11714                                 return -EIO;
11715                         }
11716                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11717                         free_extent_buffer(tmp);
11718                         if (ret)
11719                                 return ret;
11720                 }
11721         }
11722
11723         return 0;
11724 }
11725
11726 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11727 {
11728         int ret;
11729
11730         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11731         if (ret)
11732                 return ret;
11733
11734         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11735 }
11736
11737 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11738 {
11739         struct btrfs_block_group_cache *cache;
11740         struct btrfs_path path;
11741         struct extent_buffer *leaf;
11742         struct btrfs_chunk *chunk;
11743         struct btrfs_key key;
11744         int ret;
11745         u64 start;
11746
11747         btrfs_init_path(&path);
11748         key.objectid = 0;
11749         key.type = BTRFS_CHUNK_ITEM_KEY;
11750         key.offset = 0;
11751         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11752         if (ret < 0) {
11753                 btrfs_release_path(&path);
11754                 return ret;
11755         }
11756
11757         /*
11758          * We do this in case the block groups were screwed up and had alloc
11759          * bits that aren't actually set on the chunks.  This happens with
11760          * restored images every time and could happen in real life I guess.
11761          */
11762         fs_info->avail_data_alloc_bits = 0;
11763         fs_info->avail_metadata_alloc_bits = 0;
11764         fs_info->avail_system_alloc_bits = 0;
11765
11766         /* First we need to create the in-memory block groups */
11767         while (1) {
11768                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11769                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11770                         if (ret < 0) {
11771                                 btrfs_release_path(&path);
11772                                 return ret;
11773                         }
11774                         if (ret) {
11775                                 ret = 0;
11776                                 break;
11777                         }
11778                 }
11779                 leaf = path.nodes[0];
11780                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11781                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11782                         path.slots[0]++;
11783                         continue;
11784                 }
11785
11786                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11787                 btrfs_add_block_group(fs_info, 0,
11788                                       btrfs_chunk_type(leaf, chunk),
11789                                       key.objectid, key.offset,
11790                                       btrfs_chunk_length(leaf, chunk));
11791                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11792                                  key.offset + btrfs_chunk_length(leaf, chunk));
11793                 path.slots[0]++;
11794         }
11795         start = 0;
11796         while (1) {
11797                 cache = btrfs_lookup_first_block_group(fs_info, start);
11798                 if (!cache)
11799                         break;
11800                 cache->cached = 1;
11801                 start = cache->key.objectid + cache->key.offset;
11802         }
11803
11804         btrfs_release_path(&path);
11805         return 0;
11806 }
11807
11808 static int reset_balance(struct btrfs_trans_handle *trans,
11809                          struct btrfs_fs_info *fs_info)
11810 {
11811         struct btrfs_root *root = fs_info->tree_root;
11812         struct btrfs_path path;
11813         struct extent_buffer *leaf;
11814         struct btrfs_key key;
11815         int del_slot, del_nr = 0;
11816         int ret;
11817         int found = 0;
11818
11819         btrfs_init_path(&path);
11820         key.objectid = BTRFS_BALANCE_OBJECTID;
11821         key.type = BTRFS_BALANCE_ITEM_KEY;
11822         key.offset = 0;
11823         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11824         if (ret) {
11825                 if (ret > 0)
11826                         ret = 0;
11827                 if (!ret)
11828                         goto reinit_data_reloc;
11829                 else
11830                         goto out;
11831         }
11832
11833         ret = btrfs_del_item(trans, root, &path);
11834         if (ret)
11835                 goto out;
11836         btrfs_release_path(&path);
11837
11838         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11839         key.type = BTRFS_ROOT_ITEM_KEY;
11840         key.offset = 0;
11841         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11842         if (ret < 0)
11843                 goto out;
11844         while (1) {
11845                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11846                         if (!found)
11847                                 break;
11848
11849                         if (del_nr) {
11850                                 ret = btrfs_del_items(trans, root, &path,
11851                                                       del_slot, del_nr);
11852                                 del_nr = 0;
11853                                 if (ret)
11854                                         goto out;
11855                         }
11856                         key.offset++;
11857                         btrfs_release_path(&path);
11858
11859                         found = 0;
11860                         ret = btrfs_search_slot(trans, root, &key, &path,
11861                                                 -1, 1);
11862                         if (ret < 0)
11863                                 goto out;
11864                         continue;
11865                 }
11866                 found = 1;
11867                 leaf = path.nodes[0];
11868                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11869                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11870                         break;
11871                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11872                         path.slots[0]++;
11873                         continue;
11874                 }
11875                 if (!del_nr) {
11876                         del_slot = path.slots[0];
11877                         del_nr = 1;
11878                 } else {
11879                         del_nr++;
11880                 }
11881                 path.slots[0]++;
11882         }
11883
11884         if (del_nr) {
11885                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11886                 if (ret)
11887                         goto out;
11888         }
11889         btrfs_release_path(&path);
11890
11891 reinit_data_reloc:
11892         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11893         key.type = BTRFS_ROOT_ITEM_KEY;
11894         key.offset = (u64)-1;
11895         root = btrfs_read_fs_root(fs_info, &key);
11896         if (IS_ERR(root)) {
11897                 fprintf(stderr, "Error reading data reloc tree\n");
11898                 ret = PTR_ERR(root);
11899                 goto out;
11900         }
11901         record_root_in_trans(trans, root);
11902         ret = btrfs_fsck_reinit_root(trans, root, 0);
11903         if (ret)
11904                 goto out;
11905         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11906 out:
11907         btrfs_release_path(&path);
11908         return ret;
11909 }
11910
11911 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11912                               struct btrfs_fs_info *fs_info)
11913 {
11914         u64 start = 0;
11915         int ret;
11916
11917         /*
11918          * The only reason we don't do this is because right now we're just
11919          * walking the trees we find and pinning down their bytes, we don't look
11920          * at any of the leaves.  In order to do mixed groups we'd have to check
11921          * the leaves of any fs roots and pin down the bytes for any file
11922          * extents we find.  Not hard but why do it if we don't have to?
11923          */
11924         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11925                 fprintf(stderr, "We don't support re-initing the extent tree "
11926                         "for mixed block groups yet, please notify a btrfs "
11927                         "developer you want to do this so they can add this "
11928                         "functionality.\n");
11929                 return -EINVAL;
11930         }
11931
11932         /*
11933          * first we need to walk all of the trees except the extent tree and pin
11934          * down the bytes that are in use so we don't overwrite any existing
11935          * metadata.
11936          */
11937         ret = pin_metadata_blocks(fs_info);
11938         if (ret) {
11939                 fprintf(stderr, "error pinning down used bytes\n");
11940                 return ret;
11941         }
11942
11943         /*
11944          * Need to drop all the block groups since we're going to recreate all
11945          * of them again.
11946          */
11947         btrfs_free_block_groups(fs_info);
11948         ret = reset_block_groups(fs_info);
11949         if (ret) {
11950                 fprintf(stderr, "error resetting the block groups\n");
11951                 return ret;
11952         }
11953
11954         /* Ok we can allocate now, reinit the extent root */
11955         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11956         if (ret) {
11957                 fprintf(stderr, "extent root initialization failed\n");
11958                 /*
11959                  * When the transaction code is updated we should end the
11960                  * transaction, but for now progs only knows about commit so
11961                  * just return an error.
11962                  */
11963                 return ret;
11964         }
11965
11966         /*
11967          * Now we have all the in-memory block groups setup so we can make
11968          * allocations properly, and the metadata we care about is safe since we
11969          * pinned all of it above.
11970          */
11971         while (1) {
11972                 struct btrfs_block_group_cache *cache;
11973
11974                 cache = btrfs_lookup_first_block_group(fs_info, start);
11975                 if (!cache)
11976                         break;
11977                 start = cache->key.objectid + cache->key.offset;
11978                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11979                                         &cache->key, &cache->item,
11980                                         sizeof(cache->item));
11981                 if (ret) {
11982                         fprintf(stderr, "Error adding block group\n");
11983                         return ret;
11984                 }
11985                 btrfs_extent_post_op(trans, fs_info->extent_root);
11986         }
11987
11988         ret = reset_balance(trans, fs_info);
11989         if (ret)
11990                 fprintf(stderr, "error resetting the pending balance\n");
11991
11992         return ret;
11993 }
11994
11995 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11996 {
11997         struct btrfs_path path;
11998         struct btrfs_trans_handle *trans;
11999         struct btrfs_key key;
12000         int ret;
12001
12002         printf("Recowing metadata block %llu\n", eb->start);
12003         key.objectid = btrfs_header_owner(eb);
12004         key.type = BTRFS_ROOT_ITEM_KEY;
12005         key.offset = (u64)-1;
12006
12007         root = btrfs_read_fs_root(root->fs_info, &key);
12008         if (IS_ERR(root)) {
12009                 fprintf(stderr, "Couldn't find owner root %llu\n",
12010                         key.objectid);
12011                 return PTR_ERR(root);
12012         }
12013
12014         trans = btrfs_start_transaction(root, 1);
12015         if (IS_ERR(trans))
12016                 return PTR_ERR(trans);
12017
12018         btrfs_init_path(&path);
12019         path.lowest_level = btrfs_header_level(eb);
12020         if (path.lowest_level)
12021                 btrfs_node_key_to_cpu(eb, &key, 0);
12022         else
12023                 btrfs_item_key_to_cpu(eb, &key, 0);
12024
12025         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12026         btrfs_commit_transaction(trans, root);
12027         btrfs_release_path(&path);
12028         return ret;
12029 }
12030
12031 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12032 {
12033         struct btrfs_path path;
12034         struct btrfs_trans_handle *trans;
12035         struct btrfs_key key;
12036         int ret;
12037
12038         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12039                bad->key.type, bad->key.offset);
12040         key.objectid = bad->root_id;
12041         key.type = BTRFS_ROOT_ITEM_KEY;
12042         key.offset = (u64)-1;
12043
12044         root = btrfs_read_fs_root(root->fs_info, &key);
12045         if (IS_ERR(root)) {
12046                 fprintf(stderr, "Couldn't find owner root %llu\n",
12047                         key.objectid);
12048                 return PTR_ERR(root);
12049         }
12050
12051         trans = btrfs_start_transaction(root, 1);
12052         if (IS_ERR(trans))
12053                 return PTR_ERR(trans);
12054
12055         btrfs_init_path(&path);
12056         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12057         if (ret) {
12058                 if (ret > 0)
12059                         ret = 0;
12060                 goto out;
12061         }
12062         ret = btrfs_del_item(trans, root, &path);
12063 out:
12064         btrfs_commit_transaction(trans, root);
12065         btrfs_release_path(&path);
12066         return ret;
12067 }
12068
12069 static int zero_log_tree(struct btrfs_root *root)
12070 {
12071         struct btrfs_trans_handle *trans;
12072         int ret;
12073
12074         trans = btrfs_start_transaction(root, 1);
12075         if (IS_ERR(trans)) {
12076                 ret = PTR_ERR(trans);
12077                 return ret;
12078         }
12079         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12080         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12081         ret = btrfs_commit_transaction(trans, root);
12082         return ret;
12083 }
12084
12085 static int populate_csum(struct btrfs_trans_handle *trans,
12086                          struct btrfs_root *csum_root, char *buf, u64 start,
12087                          u64 len)
12088 {
12089         u64 offset = 0;
12090         u64 sectorsize;
12091         int ret = 0;
12092
12093         while (offset < len) {
12094                 sectorsize = csum_root->fs_info->sectorsize;
12095                 ret = read_extent_data(csum_root, buf, start + offset,
12096                                        &sectorsize, 0);
12097                 if (ret)
12098                         break;
12099                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12100                                             start + offset, buf, sectorsize);
12101                 if (ret)
12102                         break;
12103                 offset += sectorsize;
12104         }
12105         return ret;
12106 }
12107
12108 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12109                                       struct btrfs_root *csum_root,
12110                                       struct btrfs_root *cur_root)
12111 {
12112         struct btrfs_path path;
12113         struct btrfs_key key;
12114         struct extent_buffer *node;
12115         struct btrfs_file_extent_item *fi;
12116         char *buf = NULL;
12117         u64 start = 0;
12118         u64 len = 0;
12119         int slot = 0;
12120         int ret = 0;
12121
12122         buf = malloc(cur_root->fs_info->sectorsize);
12123         if (!buf)
12124                 return -ENOMEM;
12125
12126         btrfs_init_path(&path);
12127         key.objectid = 0;
12128         key.offset = 0;
12129         key.type = 0;
12130         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12131         if (ret < 0)
12132                 goto out;
12133         /* Iterate all regular file extents and fill its csum */
12134         while (1) {
12135                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12136
12137                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12138                         goto next;
12139                 node = path.nodes[0];
12140                 slot = path.slots[0];
12141                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12142                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12143                         goto next;
12144                 start = btrfs_file_extent_disk_bytenr(node, fi);
12145                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12146
12147                 ret = populate_csum(trans, csum_root, buf, start, len);
12148                 if (ret == -EEXIST)
12149                         ret = 0;
12150                 if (ret < 0)
12151                         goto out;
12152 next:
12153                 /*
12154                  * TODO: if next leaf is corrupted, jump to nearest next valid
12155                  * leaf.
12156                  */
12157                 ret = btrfs_next_item(cur_root, &path);
12158                 if (ret < 0)
12159                         goto out;
12160                 if (ret > 0) {
12161                         ret = 0;
12162                         goto out;
12163                 }
12164         }
12165
12166 out:
12167         btrfs_release_path(&path);
12168         free(buf);
12169         return ret;
12170 }
12171
12172 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12173                                   struct btrfs_root *csum_root)
12174 {
12175         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12176         struct btrfs_path path;
12177         struct btrfs_root *tree_root = fs_info->tree_root;
12178         struct btrfs_root *cur_root;
12179         struct extent_buffer *node;
12180         struct btrfs_key key;
12181         int slot = 0;
12182         int ret = 0;
12183
12184         btrfs_init_path(&path);
12185         key.objectid = BTRFS_FS_TREE_OBJECTID;
12186         key.offset = 0;
12187         key.type = BTRFS_ROOT_ITEM_KEY;
12188         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12189         if (ret < 0)
12190                 goto out;
12191         if (ret > 0) {
12192                 ret = -ENOENT;
12193                 goto out;
12194         }
12195
12196         while (1) {
12197                 node = path.nodes[0];
12198                 slot = path.slots[0];
12199                 btrfs_item_key_to_cpu(node, &key, slot);
12200                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12201                         goto out;
12202                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12203                         goto next;
12204                 if (!is_fstree(key.objectid))
12205                         goto next;
12206                 key.offset = (u64)-1;
12207
12208                 cur_root = btrfs_read_fs_root(fs_info, &key);
12209                 if (IS_ERR(cur_root) || !cur_root) {
12210                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12211                                 key.objectid);
12212                         goto out;
12213                 }
12214                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12215                                 cur_root);
12216                 if (ret < 0)
12217                         goto out;
12218 next:
12219                 ret = btrfs_next_item(tree_root, &path);
12220                 if (ret > 0) {
12221                         ret = 0;
12222                         goto out;
12223                 }
12224                 if (ret < 0)
12225                         goto out;
12226         }
12227
12228 out:
12229         btrfs_release_path(&path);
12230         return ret;
12231 }
12232
12233 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12234                                       struct btrfs_root *csum_root)
12235 {
12236         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12237         struct btrfs_path path;
12238         struct btrfs_extent_item *ei;
12239         struct extent_buffer *leaf;
12240         char *buf;
12241         struct btrfs_key key;
12242         int ret;
12243
12244         btrfs_init_path(&path);
12245         key.objectid = 0;
12246         key.type = BTRFS_EXTENT_ITEM_KEY;
12247         key.offset = 0;
12248         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12249         if (ret < 0) {
12250                 btrfs_release_path(&path);
12251                 return ret;
12252         }
12253
12254         buf = malloc(csum_root->fs_info->sectorsize);
12255         if (!buf) {
12256                 btrfs_release_path(&path);
12257                 return -ENOMEM;
12258         }
12259
12260         while (1) {
12261                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12262                         ret = btrfs_next_leaf(extent_root, &path);
12263                         if (ret < 0)
12264                                 break;
12265                         if (ret) {
12266                                 ret = 0;
12267                                 break;
12268                         }
12269                 }
12270                 leaf = path.nodes[0];
12271
12272                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12273                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12274                         path.slots[0]++;
12275                         continue;
12276                 }
12277
12278                 ei = btrfs_item_ptr(leaf, path.slots[0],
12279                                     struct btrfs_extent_item);
12280                 if (!(btrfs_extent_flags(leaf, ei) &
12281                       BTRFS_EXTENT_FLAG_DATA)) {
12282                         path.slots[0]++;
12283                         continue;
12284                 }
12285
12286                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12287                                     key.offset);
12288                 if (ret)
12289                         break;
12290                 path.slots[0]++;
12291         }
12292
12293         btrfs_release_path(&path);
12294         free(buf);
12295         return ret;
12296 }
12297
12298 /*
12299  * Recalculate the csum and put it into the csum tree.
12300  *
12301  * Extent tree init will wipe out all the extent info, so in that case, we
12302  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12303  * will use fs/subvol trees to init the csum tree.
12304  */
12305 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12306                           struct btrfs_root *csum_root,
12307                           int search_fs_tree)
12308 {
12309         if (search_fs_tree)
12310                 return fill_csum_tree_from_fs(trans, csum_root);
12311         else
12312                 return fill_csum_tree_from_extent(trans, csum_root);
12313 }
12314
12315 static void free_roots_info_cache(void)
12316 {
12317         if (!roots_info_cache)
12318                 return;
12319
12320         while (!cache_tree_empty(roots_info_cache)) {
12321                 struct cache_extent *entry;
12322                 struct root_item_info *rii;
12323
12324                 entry = first_cache_extent(roots_info_cache);
12325                 if (!entry)
12326                         break;
12327                 remove_cache_extent(roots_info_cache, entry);
12328                 rii = container_of(entry, struct root_item_info, cache_extent);
12329                 free(rii);
12330         }
12331
12332         free(roots_info_cache);
12333         roots_info_cache = NULL;
12334 }
12335
12336 static int build_roots_info_cache(struct btrfs_fs_info *info)
12337 {
12338         int ret = 0;
12339         struct btrfs_key key;
12340         struct extent_buffer *leaf;
12341         struct btrfs_path path;
12342
12343         if (!roots_info_cache) {
12344                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12345                 if (!roots_info_cache)
12346                         return -ENOMEM;
12347                 cache_tree_init(roots_info_cache);
12348         }
12349
12350         btrfs_init_path(&path);
12351         key.objectid = 0;
12352         key.type = BTRFS_EXTENT_ITEM_KEY;
12353         key.offset = 0;
12354         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12355         if (ret < 0)
12356                 goto out;
12357         leaf = path.nodes[0];
12358
12359         while (1) {
12360                 struct btrfs_key found_key;
12361                 struct btrfs_extent_item *ei;
12362                 struct btrfs_extent_inline_ref *iref;
12363                 int slot = path.slots[0];
12364                 int type;
12365                 u64 flags;
12366                 u64 root_id;
12367                 u8 level;
12368                 struct cache_extent *entry;
12369                 struct root_item_info *rii;
12370
12371                 if (slot >= btrfs_header_nritems(leaf)) {
12372                         ret = btrfs_next_leaf(info->extent_root, &path);
12373                         if (ret < 0) {
12374                                 break;
12375                         } else if (ret) {
12376                                 ret = 0;
12377                                 break;
12378                         }
12379                         leaf = path.nodes[0];
12380                         slot = path.slots[0];
12381                 }
12382
12383                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12384
12385                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12386                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12387                         goto next;
12388
12389                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12390                 flags = btrfs_extent_flags(leaf, ei);
12391
12392                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12393                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12394                         goto next;
12395
12396                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12397                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12398                         level = found_key.offset;
12399                 } else {
12400                         struct btrfs_tree_block_info *binfo;
12401
12402                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12403                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12404                         level = btrfs_tree_block_level(leaf, binfo);
12405                 }
12406
12407                 /*
12408                  * For a root extent, it must be of the following type and the
12409                  * first (and only one) iref in the item.
12410                  */
12411                 type = btrfs_extent_inline_ref_type(leaf, iref);
12412                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12413                         goto next;
12414
12415                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12416                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12417                 if (!entry) {
12418                         rii = malloc(sizeof(struct root_item_info));
12419                         if (!rii) {
12420                                 ret = -ENOMEM;
12421                                 goto out;
12422                         }
12423                         rii->cache_extent.start = root_id;
12424                         rii->cache_extent.size = 1;
12425                         rii->level = (u8)-1;
12426                         entry = &rii->cache_extent;
12427                         ret = insert_cache_extent(roots_info_cache, entry);
12428                         ASSERT(ret == 0);
12429                 } else {
12430                         rii = container_of(entry, struct root_item_info,
12431                                            cache_extent);
12432                 }
12433
12434                 ASSERT(rii->cache_extent.start == root_id);
12435                 ASSERT(rii->cache_extent.size == 1);
12436
12437                 if (level > rii->level || rii->level == (u8)-1) {
12438                         rii->level = level;
12439                         rii->bytenr = found_key.objectid;
12440                         rii->gen = btrfs_extent_generation(leaf, ei);
12441                         rii->node_count = 1;
12442                 } else if (level == rii->level) {
12443                         rii->node_count++;
12444                 }
12445 next:
12446                 path.slots[0]++;
12447         }
12448
12449 out:
12450         btrfs_release_path(&path);
12451
12452         return ret;
12453 }
12454
12455 static int maybe_repair_root_item(struct btrfs_path *path,
12456                                   const struct btrfs_key *root_key,
12457                                   const int read_only_mode)
12458 {
12459         const u64 root_id = root_key->objectid;
12460         struct cache_extent *entry;
12461         struct root_item_info *rii;
12462         struct btrfs_root_item ri;
12463         unsigned long offset;
12464
12465         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12466         if (!entry) {
12467                 fprintf(stderr,
12468                         "Error: could not find extent items for root %llu\n",
12469                         root_key->objectid);
12470                 return -ENOENT;
12471         }
12472
12473         rii = container_of(entry, struct root_item_info, cache_extent);
12474         ASSERT(rii->cache_extent.start == root_id);
12475         ASSERT(rii->cache_extent.size == 1);
12476
12477         if (rii->node_count != 1) {
12478                 fprintf(stderr,
12479                         "Error: could not find btree root extent for root %llu\n",
12480                         root_id);
12481                 return -ENOENT;
12482         }
12483
12484         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12485         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12486
12487         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12488             btrfs_root_level(&ri) != rii->level ||
12489             btrfs_root_generation(&ri) != rii->gen) {
12490
12491                 /*
12492                  * If we're in repair mode but our caller told us to not update
12493                  * the root item, i.e. just check if it needs to be updated, don't
12494                  * print this message, since the caller will call us again shortly
12495                  * for the same root item without read only mode (the caller will
12496                  * open a transaction first).
12497                  */
12498                 if (!(read_only_mode && repair))
12499                         fprintf(stderr,
12500                                 "%sroot item for root %llu,"
12501                                 " current bytenr %llu, current gen %llu, current level %u,"
12502                                 " new bytenr %llu, new gen %llu, new level %u\n",
12503                                 (read_only_mode ? "" : "fixing "),
12504                                 root_id,
12505                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12506                                 btrfs_root_level(&ri),
12507                                 rii->bytenr, rii->gen, rii->level);
12508
12509                 if (btrfs_root_generation(&ri) > rii->gen) {
12510                         fprintf(stderr,
12511                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12512                                 root_id, btrfs_root_generation(&ri), rii->gen);
12513                         return -EINVAL;
12514                 }
12515
12516                 if (!read_only_mode) {
12517                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12518                         btrfs_set_root_level(&ri, rii->level);
12519                         btrfs_set_root_generation(&ri, rii->gen);
12520                         write_extent_buffer(path->nodes[0], &ri,
12521                                             offset, sizeof(ri));
12522                 }
12523
12524                 return 1;
12525         }
12526
12527         return 0;
12528 }
12529
12530 /*
12531  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12532  * caused read-only snapshots to be corrupted if they were created at a moment
12533  * when the source subvolume/snapshot had orphan items. The issue was that the
12534  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12535  * node instead of the post orphan cleanup root node.
12536  * So this function, and its callees, just detects and fixes those cases. Even
12537  * though the regression was for read-only snapshots, this function applies to
12538  * any snapshot/subvolume root.
12539  * This must be run before any other repair code - not doing it so, makes other
12540  * repair code delete or modify backrefs in the extent tree for example, which
12541  * will result in an inconsistent fs after repairing the root items.
12542  */
12543 static int repair_root_items(struct btrfs_fs_info *info)
12544 {
12545         struct btrfs_path path;
12546         struct btrfs_key key;
12547         struct extent_buffer *leaf;
12548         struct btrfs_trans_handle *trans = NULL;
12549         int ret = 0;
12550         int bad_roots = 0;
12551         int need_trans = 0;
12552
12553         btrfs_init_path(&path);
12554
12555         ret = build_roots_info_cache(info);
12556         if (ret)
12557                 goto out;
12558
12559         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12560         key.type = BTRFS_ROOT_ITEM_KEY;
12561         key.offset = 0;
12562
12563 again:
12564         /*
12565          * Avoid opening and committing transactions if a leaf doesn't have
12566          * any root items that need to be fixed, so that we avoid rotating
12567          * backup roots unnecessarily.
12568          */
12569         if (need_trans) {
12570                 trans = btrfs_start_transaction(info->tree_root, 1);
12571                 if (IS_ERR(trans)) {
12572                         ret = PTR_ERR(trans);
12573                         goto out;
12574                 }
12575         }
12576
12577         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12578                                 0, trans ? 1 : 0);
12579         if (ret < 0)
12580                 goto out;
12581         leaf = path.nodes[0];
12582
12583         while (1) {
12584                 struct btrfs_key found_key;
12585
12586                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12587                         int no_more_keys = find_next_key(&path, &key);
12588
12589                         btrfs_release_path(&path);
12590                         if (trans) {
12591                                 ret = btrfs_commit_transaction(trans,
12592                                                                info->tree_root);
12593                                 trans = NULL;
12594                                 if (ret < 0)
12595                                         goto out;
12596                         }
12597                         need_trans = 0;
12598                         if (no_more_keys)
12599                                 break;
12600                         goto again;
12601                 }
12602
12603                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12604
12605                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12606                         goto next;
12607                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12608                         goto next;
12609
12610                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12611                 if (ret < 0)
12612                         goto out;
12613                 if (ret) {
12614                         if (!trans && repair) {
12615                                 need_trans = 1;
12616                                 key = found_key;
12617                                 btrfs_release_path(&path);
12618                                 goto again;
12619                         }
12620                         bad_roots++;
12621                 }
12622 next:
12623                 path.slots[0]++;
12624         }
12625         ret = 0;
12626 out:
12627         free_roots_info_cache();
12628         btrfs_release_path(&path);
12629         if (trans)
12630                 btrfs_commit_transaction(trans, info->tree_root);
12631         if (ret < 0)
12632                 return ret;
12633
12634         return bad_roots;
12635 }
12636
12637 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12638 {
12639         struct btrfs_trans_handle *trans;
12640         struct btrfs_block_group_cache *bg_cache;
12641         u64 current = 0;
12642         int ret = 0;
12643
12644         /* Clear all free space cache inodes and its extent data */
12645         while (1) {
12646                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12647                 if (!bg_cache)
12648                         break;
12649                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12650                 if (ret < 0)
12651                         return ret;
12652                 current = bg_cache->key.objectid + bg_cache->key.offset;
12653         }
12654
12655         /* Don't forget to set cache_generation to -1 */
12656         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12657         if (IS_ERR(trans)) {
12658                 error("failed to update super block cache generation");
12659                 return PTR_ERR(trans);
12660         }
12661         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12662         btrfs_commit_transaction(trans, fs_info->tree_root);
12663
12664         return ret;
12665 }
12666
12667 const char * const cmd_check_usage[] = {
12668         "btrfs check [options] <device>",
12669         "Check structural integrity of a filesystem (unmounted).",
12670         "Check structural integrity of an unmounted filesystem. Verify internal",
12671         "trees' consistency and item connectivity. In the repair mode try to",
12672         "fix the problems found. ",
12673         "WARNING: the repair mode is considered dangerous",
12674         "",
12675         "-s|--super <superblock>     use this superblock copy",
12676         "-b|--backup                 use the first valid backup root copy",
12677         "--repair                    try to repair the filesystem",
12678         "--readonly                  run in read-only mode (default)",
12679         "--init-csum-tree            create a new CRC tree",
12680         "--init-extent-tree          create a new extent tree",
12681         "--mode <MODE>               allows choice of memory/IO trade-offs",
12682         "                            where MODE is one of:",
12683         "                            original - read inodes and extents to memory (requires",
12684         "                                       more memory, does less IO)",
12685         "                            lowmem   - try to use less memory but read blocks again",
12686         "                                       when needed",
12687         "--check-data-csum           verify checksums of data blocks",
12688         "-Q|--qgroup-report          print a report on qgroup consistency",
12689         "-E|--subvol-extents <subvolid>",
12690         "                            print subvolume extents and sharing state",
12691         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12692         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12693         "-p|--progress               indicate progress",
12694         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12695         NULL
12696 };
12697
12698 int cmd_check(int argc, char **argv)
12699 {
12700         struct cache_tree root_cache;
12701         struct btrfs_root *root;
12702         struct btrfs_fs_info *info;
12703         u64 bytenr = 0;
12704         u64 subvolid = 0;
12705         u64 tree_root_bytenr = 0;
12706         u64 chunk_root_bytenr = 0;
12707         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12708         int ret;
12709         int err = 0;
12710         u64 num;
12711         int init_csum_tree = 0;
12712         int readonly = 0;
12713         int clear_space_cache = 0;
12714         int qgroup_report = 0;
12715         int qgroups_repaired = 0;
12716         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12717
12718         while(1) {
12719                 int c;
12720                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12721                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12722                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12723                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12724                 static const struct option long_options[] = {
12725                         { "super", required_argument, NULL, 's' },
12726                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12727                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12728                         { "init-csum-tree", no_argument, NULL,
12729                                 GETOPT_VAL_INIT_CSUM },
12730                         { "init-extent-tree", no_argument, NULL,
12731                                 GETOPT_VAL_INIT_EXTENT },
12732                         { "check-data-csum", no_argument, NULL,
12733                                 GETOPT_VAL_CHECK_CSUM },
12734                         { "backup", no_argument, NULL, 'b' },
12735                         { "subvol-extents", required_argument, NULL, 'E' },
12736                         { "qgroup-report", no_argument, NULL, 'Q' },
12737                         { "tree-root", required_argument, NULL, 'r' },
12738                         { "chunk-root", required_argument, NULL,
12739                                 GETOPT_VAL_CHUNK_TREE },
12740                         { "progress", no_argument, NULL, 'p' },
12741                         { "mode", required_argument, NULL,
12742                                 GETOPT_VAL_MODE },
12743                         { "clear-space-cache", required_argument, NULL,
12744                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12745                         { NULL, 0, NULL, 0}
12746                 };
12747
12748                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12749                 if (c < 0)
12750                         break;
12751                 switch(c) {
12752                         case 'a': /* ignored */ break;
12753                         case 'b':
12754                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12755                                 break;
12756                         case 's':
12757                                 num = arg_strtou64(optarg);
12758                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12759                                         error(
12760                                         "super mirror should be less than %d",
12761                                                 BTRFS_SUPER_MIRROR_MAX);
12762                                         exit(1);
12763                                 }
12764                                 bytenr = btrfs_sb_offset(((int)num));
12765                                 printf("using SB copy %llu, bytenr %llu\n", num,
12766                                        (unsigned long long)bytenr);
12767                                 break;
12768                         case 'Q':
12769                                 qgroup_report = 1;
12770                                 break;
12771                         case 'E':
12772                                 subvolid = arg_strtou64(optarg);
12773                                 break;
12774                         case 'r':
12775                                 tree_root_bytenr = arg_strtou64(optarg);
12776                                 break;
12777                         case GETOPT_VAL_CHUNK_TREE:
12778                                 chunk_root_bytenr = arg_strtou64(optarg);
12779                                 break;
12780                         case 'p':
12781                                 ctx.progress_enabled = true;
12782                                 break;
12783                         case '?':
12784                         case 'h':
12785                                 usage(cmd_check_usage);
12786                         case GETOPT_VAL_REPAIR:
12787                                 printf("enabling repair mode\n");
12788                                 repair = 1;
12789                                 ctree_flags |= OPEN_CTREE_WRITES;
12790                                 break;
12791                         case GETOPT_VAL_READONLY:
12792                                 readonly = 1;
12793                                 break;
12794                         case GETOPT_VAL_INIT_CSUM:
12795                                 printf("Creating a new CRC tree\n");
12796                                 init_csum_tree = 1;
12797                                 repair = 1;
12798                                 ctree_flags |= OPEN_CTREE_WRITES;
12799                                 break;
12800                         case GETOPT_VAL_INIT_EXTENT:
12801                                 init_extent_tree = 1;
12802                                 ctree_flags |= (OPEN_CTREE_WRITES |
12803                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12804                                 repair = 1;
12805                                 break;
12806                         case GETOPT_VAL_CHECK_CSUM:
12807                                 check_data_csum = 1;
12808                                 break;
12809                         case GETOPT_VAL_MODE:
12810                                 check_mode = parse_check_mode(optarg);
12811                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12812                                         error("unknown mode: %s", optarg);
12813                                         exit(1);
12814                                 }
12815                                 break;
12816                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12817                                 if (strcmp(optarg, "v1") == 0) {
12818                                         clear_space_cache = 1;
12819                                 } else if (strcmp(optarg, "v2") == 0) {
12820                                         clear_space_cache = 2;
12821                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12822                                 } else {
12823                                         error(
12824                 "invalid argument to --clear-space-cache, must be v1 or v2");
12825                                         exit(1);
12826                                 }
12827                                 ctree_flags |= OPEN_CTREE_WRITES;
12828                                 break;
12829                 }
12830         }
12831
12832         if (check_argc_exact(argc - optind, 1))
12833                 usage(cmd_check_usage);
12834
12835         if (ctx.progress_enabled) {
12836                 ctx.tp = TASK_NOTHING;
12837                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12838         }
12839
12840         /* This check is the only reason for --readonly to exist */
12841         if (readonly && repair) {
12842                 error("repair options are not compatible with --readonly");
12843                 exit(1);
12844         }
12845
12846         /*
12847          * Not supported yet
12848          */
12849         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12850                 error("low memory mode doesn't support repair yet");
12851                 exit(1);
12852         }
12853
12854         radix_tree_init();
12855         cache_tree_init(&root_cache);
12856
12857         if((ret = check_mounted(argv[optind])) < 0) {
12858                 error("could not check mount status: %s", strerror(-ret));
12859                 err |= !!ret;
12860                 goto err_out;
12861         } else if(ret) {
12862                 error("%s is currently mounted, aborting", argv[optind]);
12863                 ret = -EBUSY;
12864                 err |= !!ret;
12865                 goto err_out;
12866         }
12867
12868         /* only allow partial opening under repair mode */
12869         if (repair)
12870                 ctree_flags |= OPEN_CTREE_PARTIAL;
12871
12872         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12873                                   chunk_root_bytenr, ctree_flags);
12874         if (!info) {
12875                 error("cannot open file system");
12876                 ret = -EIO;
12877                 err |= !!ret;
12878                 goto err_out;
12879         }
12880
12881         global_info = info;
12882         root = info->fs_root;
12883         if (clear_space_cache == 1) {
12884                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12885                         error(
12886                 "free space cache v2 detected, use --clear-space-cache v2");
12887                         ret = 1;
12888                         goto close_out;
12889                 }
12890                 printf("Clearing free space cache\n");
12891                 ret = clear_free_space_cache(info);
12892                 if (ret) {
12893                         error("failed to clear free space cache");
12894                         ret = 1;
12895                 } else {
12896                         printf("Free space cache cleared\n");
12897                 }
12898                 goto close_out;
12899         } else if (clear_space_cache == 2) {
12900                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12901                         printf("no free space cache v2 to clear\n");
12902                         ret = 0;
12903                         goto close_out;
12904                 }
12905                 printf("Clear free space cache v2\n");
12906                 ret = btrfs_clear_free_space_tree(info);
12907                 if (ret) {
12908                         error("failed to clear free space cache v2: %d", ret);
12909                         ret = 1;
12910                 } else {
12911                         printf("free space cache v2 cleared\n");
12912                 }
12913                 goto close_out;
12914         }
12915
12916         /*
12917          * repair mode will force us to commit transaction which
12918          * will make us fail to load log tree when mounting.
12919          */
12920         if (repair && btrfs_super_log_root(info->super_copy)) {
12921                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12922                 if (!ret) {
12923                         ret = 1;
12924                         err |= !!ret;
12925                         goto close_out;
12926                 }
12927                 ret = zero_log_tree(root);
12928                 err |= !!ret;
12929                 if (ret) {
12930                         error("failed to zero log tree: %d", ret);
12931                         goto close_out;
12932                 }
12933         }
12934
12935         uuid_unparse(info->super_copy->fsid, uuidbuf);
12936         if (qgroup_report) {
12937                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12938                        uuidbuf);
12939                 ret = qgroup_verify_all(info);
12940                 err |= !!ret;
12941                 if (ret == 0)
12942                         report_qgroups(1);
12943                 goto close_out;
12944         }
12945         if (subvolid) {
12946                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12947                        subvolid, argv[optind], uuidbuf);
12948                 ret = print_extent_state(info, subvolid);
12949                 err |= !!ret;
12950                 goto close_out;
12951         }
12952         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12953
12954         if (!extent_buffer_uptodate(info->tree_root->node) ||
12955             !extent_buffer_uptodate(info->dev_root->node) ||
12956             !extent_buffer_uptodate(info->chunk_root->node)) {
12957                 error("critical roots corrupted, unable to check the filesystem");
12958                 err |= !!ret;
12959                 ret = -EIO;
12960                 goto close_out;
12961         }
12962
12963         if (init_extent_tree || init_csum_tree) {
12964                 struct btrfs_trans_handle *trans;
12965
12966                 trans = btrfs_start_transaction(info->extent_root, 0);
12967                 if (IS_ERR(trans)) {
12968                         error("error starting transaction");
12969                         ret = PTR_ERR(trans);
12970                         err |= !!ret;
12971                         goto close_out;
12972                 }
12973
12974                 if (init_extent_tree) {
12975                         printf("Creating a new extent tree\n");
12976                         ret = reinit_extent_tree(trans, info);
12977                         err |= !!ret;
12978                         if (ret)
12979                                 goto close_out;
12980                 }
12981
12982                 if (init_csum_tree) {
12983                         printf("Reinitialize checksum tree\n");
12984                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12985                         if (ret) {
12986                                 error("checksum tree initialization failed: %d",
12987                                                 ret);
12988                                 ret = -EIO;
12989                                 err |= !!ret;
12990                                 goto close_out;
12991                         }
12992
12993                         ret = fill_csum_tree(trans, info->csum_root,
12994                                              init_extent_tree);
12995                         err |= !!ret;
12996                         if (ret) {
12997                                 error("checksum tree refilling failed: %d", ret);
12998                                 return -EIO;
12999                         }
13000                 }
13001                 /*
13002                  * Ok now we commit and run the normal fsck, which will add
13003                  * extent entries for all of the items it finds.
13004                  */
13005                 ret = btrfs_commit_transaction(trans, info->extent_root);
13006                 err |= !!ret;
13007                 if (ret)
13008                         goto close_out;
13009         }
13010         if (!extent_buffer_uptodate(info->extent_root->node)) {
13011                 error("critical: extent_root, unable to check the filesystem");
13012                 ret = -EIO;
13013                 err |= !!ret;
13014                 goto close_out;
13015         }
13016         if (!extent_buffer_uptodate(info->csum_root->node)) {
13017                 error("critical: csum_root, unable to check the filesystem");
13018                 ret = -EIO;
13019                 err |= !!ret;
13020                 goto close_out;
13021         }
13022
13023         if (!ctx.progress_enabled)
13024                 fprintf(stderr, "checking extents\n");
13025         if (check_mode == CHECK_MODE_LOWMEM)
13026                 ret = check_chunks_and_extents_v2(root);
13027         else
13028                 ret = check_chunks_and_extents(root);
13029         err |= !!ret;
13030         if (ret)
13031                 error(
13032                 "errors found in extent allocation tree or chunk allocation");
13033
13034         ret = repair_root_items(info);
13035         err |= !!ret;
13036         if (ret < 0) {
13037                 error("failed to repair root items: %s", strerror(-ret));
13038                 goto close_out;
13039         }
13040         if (repair) {
13041                 fprintf(stderr, "Fixed %d roots.\n", ret);
13042                 ret = 0;
13043         } else if (ret > 0) {
13044                 fprintf(stderr,
13045                        "Found %d roots with an outdated root item.\n",
13046                        ret);
13047                 fprintf(stderr,
13048                         "Please run a filesystem check with the option --repair to fix them.\n");
13049                 ret = 1;
13050                 err |= !!ret;
13051                 goto close_out;
13052         }
13053
13054         if (!ctx.progress_enabled) {
13055                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13056                         fprintf(stderr, "checking free space tree\n");
13057                 else
13058                         fprintf(stderr, "checking free space cache\n");
13059         }
13060         ret = check_space_cache(root);
13061         err |= !!ret;
13062         if (ret) {
13063                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13064                         error("errors found in free space tree");
13065                 else
13066                         error("errors found in free space cache");
13067                 goto out;
13068         }
13069
13070         /*
13071          * We used to have to have these hole extents in between our real
13072          * extents so if we don't have this flag set we need to make sure there
13073          * are no gaps in the file extents for inodes, otherwise we can just
13074          * ignore it when this happens.
13075          */
13076         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13077         if (!ctx.progress_enabled)
13078                 fprintf(stderr, "checking fs roots\n");
13079         if (check_mode == CHECK_MODE_LOWMEM)
13080                 ret = check_fs_roots_v2(root->fs_info);
13081         else
13082                 ret = check_fs_roots(root, &root_cache);
13083         err |= !!ret;
13084         if (ret) {
13085                 error("errors found in fs roots");
13086                 goto out;
13087         }
13088
13089         fprintf(stderr, "checking csums\n");
13090         ret = check_csums(root);
13091         err |= !!ret;
13092         if (ret) {
13093                 error("errors found in csum tree");
13094                 goto out;
13095         }
13096
13097         fprintf(stderr, "checking root refs\n");
13098         /* For low memory mode, check_fs_roots_v2 handles root refs */
13099         if (check_mode != CHECK_MODE_LOWMEM) {
13100                 ret = check_root_refs(root, &root_cache);
13101                 err |= !!ret;
13102                 if (ret) {
13103                         error("errors found in root refs");
13104                         goto out;
13105                 }
13106         }
13107
13108         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13109                 struct extent_buffer *eb;
13110
13111                 eb = list_first_entry(&root->fs_info->recow_ebs,
13112                                       struct extent_buffer, recow);
13113                 list_del_init(&eb->recow);
13114                 ret = recow_extent_buffer(root, eb);
13115                 err |= !!ret;
13116                 if (ret) {
13117                         error("fails to fix transid errors");
13118                         break;
13119                 }
13120         }
13121
13122         while (!list_empty(&delete_items)) {
13123                 struct bad_item *bad;
13124
13125                 bad = list_first_entry(&delete_items, struct bad_item, list);
13126                 list_del_init(&bad->list);
13127                 if (repair) {
13128                         ret = delete_bad_item(root, bad);
13129                         err |= !!ret;
13130                 }
13131                 free(bad);
13132         }
13133
13134         if (info->quota_enabled) {
13135                 fprintf(stderr, "checking quota groups\n");
13136                 ret = qgroup_verify_all(info);
13137                 err |= !!ret;
13138                 if (ret) {
13139                         error("failed to check quota groups");
13140                         goto out;
13141                 }
13142                 report_qgroups(0);
13143                 ret = repair_qgroups(info, &qgroups_repaired);
13144                 err |= !!ret;
13145                 if (err) {
13146                         error("failed to repair quota groups");
13147                         goto out;
13148                 }
13149                 ret = 0;
13150         }
13151
13152         if (!list_empty(&root->fs_info->recow_ebs)) {
13153                 error("transid errors in file system");
13154                 ret = 1;
13155                 err |= !!ret;
13156         }
13157 out:
13158         if (found_old_backref) { /*
13159                  * there was a disk format change when mixed
13160                  * backref was in testing tree. The old format
13161                  * existed about one week.
13162                  */
13163                 printf("\n * Found old mixed backref format. "
13164                        "The old format is not supported! *"
13165                        "\n * Please mount the FS in readonly mode, "
13166                        "backup data and re-format the FS. *\n\n");
13167                 err |= 1;
13168         }
13169         printf("found %llu bytes used, ",
13170                (unsigned long long)bytes_used);
13171         if (err)
13172                 printf("error(s) found\n");
13173         else
13174                 printf("no error found\n");
13175         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13176         printf("total tree bytes: %llu\n",
13177                (unsigned long long)total_btree_bytes);
13178         printf("total fs tree bytes: %llu\n",
13179                (unsigned long long)total_fs_tree_bytes);
13180         printf("total extent tree bytes: %llu\n",
13181                (unsigned long long)total_extent_tree_bytes);
13182         printf("btree space waste bytes: %llu\n",
13183                (unsigned long long)btree_space_waste);
13184         printf("file data blocks allocated: %llu\n referenced %llu\n",
13185                 (unsigned long long)data_bytes_allocated,
13186                 (unsigned long long)data_bytes_referenced);
13187
13188         free_qgroup_counts();
13189         free_root_recs_tree(&root_cache);
13190 close_out:
13191         close_ctree(root);
13192 err_out:
13193         if (ctx.progress_enabled)
13194                 task_deinit(ctx.info);
13195
13196         return err;
13197 }