f1e9a5fd0205040b5af6ab78b2fd52cfd5fcd063
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct cache_tree *inode_cache,
2537                             struct inode_record *rec,
2538                             struct inode_backref *backref)
2539 {
2540         struct btrfs_trans_handle *trans;
2541         struct btrfs_dir_item *di;
2542         struct btrfs_path path;
2543         int ret = 0;
2544
2545         trans = btrfs_start_transaction(root, 1);
2546         if (IS_ERR(trans))
2547                 return PTR_ERR(trans);
2548
2549         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2550                 (unsigned long long)backref->dir,
2551                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2552                 (unsigned long long)root->objectid);
2553
2554         btrfs_init_path(&path);
2555         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2556                                     backref->name, backref->namelen,
2557                                     backref->index, -1);
2558         if (IS_ERR(di)) {
2559                 ret = PTR_ERR(di);
2560                 btrfs_release_path(&path);
2561                 btrfs_commit_transaction(trans, root);
2562                 if (ret == -ENOENT)
2563                         return 0;
2564                 return ret;
2565         }
2566
2567         if (!di)
2568                 ret = btrfs_del_item(trans, root, &path);
2569         else
2570                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2571         BUG_ON(ret);
2572         btrfs_release_path(&path);
2573         btrfs_commit_transaction(trans, root);
2574         return ret;
2575 }
2576
2577 static int create_inode_item(struct btrfs_root *root,
2578                              struct inode_record *rec,
2579                              struct inode_backref *backref, int root_dir)
2580 {
2581         struct btrfs_trans_handle *trans;
2582         struct btrfs_inode_item inode_item;
2583         time_t now = time(NULL);
2584         int ret;
2585
2586         trans = btrfs_start_transaction(root, 1);
2587         if (IS_ERR(trans)) {
2588                 ret = PTR_ERR(trans);
2589                 return ret;
2590         }
2591
2592         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2593                 "be incomplete, please check permissions and content after "
2594                 "the fsck completes.\n", (unsigned long long)root->objectid,
2595                 (unsigned long long)rec->ino);
2596
2597         memset(&inode_item, 0, sizeof(inode_item));
2598         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2599         if (root_dir)
2600                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2601         else
2602                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2603         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2604         if (rec->found_dir_item) {
2605                 if (rec->found_file_extent)
2606                         fprintf(stderr, "root %llu inode %llu has both a dir "
2607                                 "item and extents, unsure if it is a dir or a "
2608                                 "regular file so setting it as a directory\n",
2609                                 (unsigned long long)root->objectid,
2610                                 (unsigned long long)rec->ino);
2611                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2612                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2613         } else if (!rec->found_dir_item) {
2614                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2615                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2616         }
2617         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2624         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2625
2626         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2627         BUG_ON(ret);
2628         btrfs_commit_transaction(trans, root);
2629         return 0;
2630 }
2631
2632 static int repair_inode_backrefs(struct btrfs_root *root,
2633                                  struct inode_record *rec,
2634                                  struct cache_tree *inode_cache,
2635                                  int delete)
2636 {
2637         struct inode_backref *tmp, *backref;
2638         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2639         int ret = 0;
2640         int repaired = 0;
2641
2642         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2643                 if (!delete && rec->ino == root_dirid) {
2644                         if (!rec->found_inode_item) {
2645                                 ret = create_inode_item(root, rec, backref, 1);
2646                                 if (ret)
2647                                         break;
2648                                 repaired++;
2649                         }
2650                 }
2651
2652                 /* Index 0 for root dir's are special, don't mess with it */
2653                 if (rec->ino == root_dirid && backref->index == 0)
2654                         continue;
2655
2656                 if (delete &&
2657                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2658                      (backref->found_dir_index && backref->found_inode_ref &&
2659                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2660                         ret = delete_dir_index(root, inode_cache, rec, backref);
2661                         if (ret)
2662                                 break;
2663                         repaired++;
2664                         list_del(&backref->list);
2665                         free(backref);
2666                 }
2667
2668                 if (!delete && !backref->found_dir_index &&
2669                     backref->found_dir_item && backref->found_inode_ref) {
2670                         ret = add_missing_dir_index(root, inode_cache, rec,
2671                                                     backref);
2672                         if (ret)
2673                                 break;
2674                         repaired++;
2675                         if (backref->found_dir_item &&
2676                             backref->found_dir_index &&
2677                             backref->found_dir_index) {
2678                                 if (!backref->errors &&
2679                                     backref->found_inode_ref) {
2680                                         list_del(&backref->list);
2681                                         free(backref);
2682                                 }
2683                         }
2684                 }
2685
2686                 if (!delete && (!backref->found_dir_index &&
2687                                 !backref->found_dir_item &&
2688                                 backref->found_inode_ref)) {
2689                         struct btrfs_trans_handle *trans;
2690                         struct btrfs_key location;
2691
2692                         ret = check_dir_conflict(root, backref->name,
2693                                                  backref->namelen,
2694                                                  backref->dir,
2695                                                  backref->index);
2696                         if (ret) {
2697                                 /*
2698                                  * let nlink fixing routine to handle it,
2699                                  * which can do it better.
2700                                  */
2701                                 ret = 0;
2702                                 break;
2703                         }
2704                         location.objectid = rec->ino;
2705                         location.type = BTRFS_INODE_ITEM_KEY;
2706                         location.offset = 0;
2707
2708                         trans = btrfs_start_transaction(root, 1);
2709                         if (IS_ERR(trans)) {
2710                                 ret = PTR_ERR(trans);
2711                                 break;
2712                         }
2713                         fprintf(stderr, "adding missing dir index/item pair "
2714                                 "for inode %llu\n",
2715                                 (unsigned long long)rec->ino);
2716                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2717                                                     backref->namelen,
2718                                                     backref->dir, &location,
2719                                                     imode_to_type(rec->imode),
2720                                                     backref->index);
2721                         BUG_ON(ret);
2722                         btrfs_commit_transaction(trans, root);
2723                         repaired++;
2724                 }
2725
2726                 if (!delete && (backref->found_inode_ref &&
2727                                 backref->found_dir_index &&
2728                                 backref->found_dir_item &&
2729                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2730                                 !rec->found_inode_item)) {
2731                         ret = create_inode_item(root, rec, backref, 0);
2732                         if (ret)
2733                                 break;
2734                         repaired++;
2735                 }
2736
2737         }
2738         return ret ? ret : repaired;
2739 }
2740
2741 /*
2742  * To determine the file type for nlink/inode_item repair
2743  *
2744  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2745  * Return -ENOENT if file type is not found.
2746  */
2747 static int find_file_type(struct inode_record *rec, u8 *type)
2748 {
2749         struct inode_backref *backref;
2750
2751         /* For inode item recovered case */
2752         if (rec->found_inode_item) {
2753                 *type = imode_to_type(rec->imode);
2754                 return 0;
2755         }
2756
2757         list_for_each_entry(backref, &rec->backrefs, list) {
2758                 if (backref->found_dir_index || backref->found_dir_item) {
2759                         *type = backref->filetype;
2760                         return 0;
2761                 }
2762         }
2763         return -ENOENT;
2764 }
2765
2766 /*
2767  * To determine the file name for nlink repair
2768  *
2769  * Return 0 if file name is found, set name and namelen.
2770  * Return -ENOENT if file name is not found.
2771  */
2772 static int find_file_name(struct inode_record *rec,
2773                           char *name, int *namelen)
2774 {
2775         struct inode_backref *backref;
2776
2777         list_for_each_entry(backref, &rec->backrefs, list) {
2778                 if (backref->found_dir_index || backref->found_dir_item ||
2779                     backref->found_inode_ref) {
2780                         memcpy(name, backref->name, backref->namelen);
2781                         *namelen = backref->namelen;
2782                         return 0;
2783                 }
2784         }
2785         return -ENOENT;
2786 }
2787
2788 /* Reset the nlink of the inode to the correct one */
2789 static int reset_nlink(struct btrfs_trans_handle *trans,
2790                        struct btrfs_root *root,
2791                        struct btrfs_path *path,
2792                        struct inode_record *rec)
2793 {
2794         struct inode_backref *backref;
2795         struct inode_backref *tmp;
2796         struct btrfs_key key;
2797         struct btrfs_inode_item *inode_item;
2798         int ret = 0;
2799
2800         /* We don't believe this either, reset it and iterate backref */
2801         rec->found_link = 0;
2802
2803         /* Remove all backref including the valid ones */
2804         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2805                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2806                                    backref->index, backref->name,
2807                                    backref->namelen, 0);
2808                 if (ret < 0)
2809                         goto out;
2810
2811                 /* remove invalid backref, so it won't be added back */
2812                 if (!(backref->found_dir_index &&
2813                       backref->found_dir_item &&
2814                       backref->found_inode_ref)) {
2815                         list_del(&backref->list);
2816                         free(backref);
2817                 } else {
2818                         rec->found_link++;
2819                 }
2820         }
2821
2822         /* Set nlink to 0 */
2823         key.objectid = rec->ino;
2824         key.type = BTRFS_INODE_ITEM_KEY;
2825         key.offset = 0;
2826         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2827         if (ret < 0)
2828                 goto out;
2829         if (ret > 0) {
2830                 ret = -ENOENT;
2831                 goto out;
2832         }
2833         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2834                                     struct btrfs_inode_item);
2835         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2836         btrfs_mark_buffer_dirty(path->nodes[0]);
2837         btrfs_release_path(path);
2838
2839         /*
2840          * Add back valid inode_ref/dir_item/dir_index,
2841          * add_link() will handle the nlink inc, so new nlink must be correct
2842          */
2843         list_for_each_entry(backref, &rec->backrefs, list) {
2844                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2845                                      backref->name, backref->namelen,
2846                                      backref->filetype, &backref->index, 1);
2847                 if (ret < 0)
2848                         goto out;
2849         }
2850 out:
2851         btrfs_release_path(path);
2852         return ret;
2853 }
2854
2855 static int get_highest_inode(struct btrfs_trans_handle *trans,
2856                                 struct btrfs_root *root,
2857                                 struct btrfs_path *path,
2858                                 u64 *highest_ino)
2859 {
2860         struct btrfs_key key, found_key;
2861         int ret;
2862
2863         btrfs_init_path(path);
2864         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2865         key.offset = -1;
2866         key.type = BTRFS_INODE_ITEM_KEY;
2867         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2868         if (ret == 1) {
2869                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2870                                 path->slots[0] - 1);
2871                 *highest_ino = found_key.objectid;
2872                 ret = 0;
2873         }
2874         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2875                 ret = -EOVERFLOW;
2876         btrfs_release_path(path);
2877         return ret;
2878 }
2879
2880 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2881                                struct btrfs_root *root,
2882                                struct btrfs_path *path,
2883                                struct inode_record *rec)
2884 {
2885         char *dir_name = "lost+found";
2886         char namebuf[BTRFS_NAME_LEN] = {0};
2887         u64 lost_found_ino;
2888         u32 mode = 0700;
2889         u8 type = 0;
2890         int namelen = 0;
2891         int name_recovered = 0;
2892         int type_recovered = 0;
2893         int ret = 0;
2894
2895         /*
2896          * Get file name and type first before these invalid inode ref
2897          * are deleted by remove_all_invalid_backref()
2898          */
2899         name_recovered = !find_file_name(rec, namebuf, &namelen);
2900         type_recovered = !find_file_type(rec, &type);
2901
2902         if (!name_recovered) {
2903                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2904                        rec->ino, rec->ino);
2905                 namelen = count_digits(rec->ino);
2906                 sprintf(namebuf, "%llu", rec->ino);
2907                 name_recovered = 1;
2908         }
2909         if (!type_recovered) {
2910                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2911                        rec->ino);
2912                 type = BTRFS_FT_REG_FILE;
2913                 type_recovered = 1;
2914         }
2915
2916         ret = reset_nlink(trans, root, path, rec);
2917         if (ret < 0) {
2918                 fprintf(stderr,
2919                         "Failed to reset nlink for inode %llu: %s\n",
2920                         rec->ino, strerror(-ret));
2921                 goto out;
2922         }
2923
2924         if (rec->found_link == 0) {
2925                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2926                 if (ret < 0)
2927                         goto out;
2928                 lost_found_ino++;
2929                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2930                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2931                                   mode);
2932                 if (ret < 0) {
2933                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2934                                 dir_name, strerror(-ret));
2935                         goto out;
2936                 }
2937                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2938                                      namebuf, namelen, type, NULL, 1);
2939                 /*
2940                  * Add ".INO" suffix several times to handle case where
2941                  * "FILENAME.INO" is already taken by another file.
2942                  */
2943                 while (ret == -EEXIST) {
2944                         /*
2945                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2946                          */
2947                         if (namelen + count_digits(rec->ino) + 1 >
2948                             BTRFS_NAME_LEN) {
2949                                 ret = -EFBIG;
2950                                 goto out;
2951                         }
2952                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2953                                  ".%llu", rec->ino);
2954                         namelen += count_digits(rec->ino) + 1;
2955                         ret = btrfs_add_link(trans, root, rec->ino,
2956                                              lost_found_ino, namebuf,
2957                                              namelen, type, NULL, 1);
2958                 }
2959                 if (ret < 0) {
2960                         fprintf(stderr,
2961                                 "Failed to link the inode %llu to %s dir: %s\n",
2962                                 rec->ino, dir_name, strerror(-ret));
2963                         goto out;
2964                 }
2965                 /*
2966                  * Just increase the found_link, don't actually add the
2967                  * backref. This will make things easier and this inode
2968                  * record will be freed after the repair is done.
2969                  * So fsck will not report problem about this inode.
2970                  */
2971                 rec->found_link++;
2972                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2973                        namelen, namebuf, dir_name);
2974         }
2975         printf("Fixed the nlink of inode %llu\n", rec->ino);
2976 out:
2977         /*
2978          * Clear the flag anyway, or we will loop forever for the same inode
2979          * as it will not be removed from the bad inode list and the dead loop
2980          * happens.
2981          */
2982         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2983         btrfs_release_path(path);
2984         return ret;
2985 }
2986
2987 /*
2988  * Check if there is any normal(reg or prealloc) file extent for given
2989  * ino.
2990  * This is used to determine the file type when neither its dir_index/item or
2991  * inode_item exists.
2992  *
2993  * This will *NOT* report error, if any error happens, just consider it does
2994  * not have any normal file extent.
2995  */
2996 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2997 {
2998         struct btrfs_path path;
2999         struct btrfs_key key;
3000         struct btrfs_key found_key;
3001         struct btrfs_file_extent_item *fi;
3002         u8 type;
3003         int ret = 0;
3004
3005         btrfs_init_path(&path);
3006         key.objectid = ino;
3007         key.type = BTRFS_EXTENT_DATA_KEY;
3008         key.offset = 0;
3009
3010         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3011         if (ret < 0) {
3012                 ret = 0;
3013                 goto out;
3014         }
3015         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3016                 ret = btrfs_next_leaf(root, &path);
3017                 if (ret) {
3018                         ret = 0;
3019                         goto out;
3020                 }
3021         }
3022         while (1) {
3023                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3024                                       path.slots[0]);
3025                 if (found_key.objectid != ino ||
3026                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3027                         break;
3028                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3029                                     struct btrfs_file_extent_item);
3030                 type = btrfs_file_extent_type(path.nodes[0], fi);
3031                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3032                         ret = 1;
3033                         goto out;
3034                 }
3035         }
3036 out:
3037         btrfs_release_path(&path);
3038         return ret;
3039 }
3040
3041 static u32 btrfs_type_to_imode(u8 type)
3042 {
3043         static u32 imode_by_btrfs_type[] = {
3044                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3045                 [BTRFS_FT_DIR]          = S_IFDIR,
3046                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3047                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3048                 [BTRFS_FT_FIFO]         = S_IFIFO,
3049                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3050                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3051         };
3052
3053         return imode_by_btrfs_type[(type)];
3054 }
3055
3056 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3057                                 struct btrfs_root *root,
3058                                 struct btrfs_path *path,
3059                                 struct inode_record *rec)
3060 {
3061         u8 filetype;
3062         u32 mode = 0700;
3063         int type_recovered = 0;
3064         int ret = 0;
3065
3066         printf("Trying to rebuild inode:%llu\n", rec->ino);
3067
3068         type_recovered = !find_file_type(rec, &filetype);
3069
3070         /*
3071          * Try to determine inode type if type not found.
3072          *
3073          * For found regular file extent, it must be FILE.
3074          * For found dir_item/index, it must be DIR.
3075          *
3076          * For undetermined one, use FILE as fallback.
3077          *
3078          * TODO:
3079          * 1. If found backref(inode_index/item is already handled) to it,
3080          *    it must be DIR.
3081          *    Need new inode-inode ref structure to allow search for that.
3082          */
3083         if (!type_recovered) {
3084                 if (rec->found_file_extent &&
3085                     find_normal_file_extent(root, rec->ino)) {
3086                         type_recovered = 1;
3087                         filetype = BTRFS_FT_REG_FILE;
3088                 } else if (rec->found_dir_item) {
3089                         type_recovered = 1;
3090                         filetype = BTRFS_FT_DIR;
3091                 } else if (!list_empty(&rec->orphan_extents)) {
3092                         type_recovered = 1;
3093                         filetype = BTRFS_FT_REG_FILE;
3094                 } else{
3095                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3096                                rec->ino);
3097                         type_recovered = 1;
3098                         filetype = BTRFS_FT_REG_FILE;
3099                 }
3100         }
3101
3102         ret = btrfs_new_inode(trans, root, rec->ino,
3103                               mode | btrfs_type_to_imode(filetype));
3104         if (ret < 0)
3105                 goto out;
3106
3107         /*
3108          * Here inode rebuild is done, we only rebuild the inode item,
3109          * don't repair the nlink(like move to lost+found).
3110          * That is the job of nlink repair.
3111          *
3112          * We just fill the record and return
3113          */
3114         rec->found_dir_item = 1;
3115         rec->imode = mode | btrfs_type_to_imode(filetype);
3116         rec->nlink = 0;
3117         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3118         /* Ensure the inode_nlinks repair function will be called */
3119         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3120 out:
3121         return ret;
3122 }
3123
3124 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3125                                       struct btrfs_root *root,
3126                                       struct btrfs_path *path,
3127                                       struct inode_record *rec)
3128 {
3129         struct orphan_data_extent *orphan;
3130         struct orphan_data_extent *tmp;
3131         int ret = 0;
3132
3133         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3134                 /*
3135                  * Check for conflicting file extents
3136                  *
3137                  * Here we don't know whether the extents is compressed or not,
3138                  * so we can only assume it not compressed nor data offset,
3139                  * and use its disk_len as extent length.
3140                  */
3141                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3142                                        orphan->offset, orphan->disk_len, 0);
3143                 btrfs_release_path(path);
3144                 if (ret < 0)
3145                         goto out;
3146                 if (!ret) {
3147                         fprintf(stderr,
3148                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3149                                 orphan->disk_bytenr, orphan->disk_len);
3150                         ret = btrfs_free_extent(trans,
3151                                         root->fs_info->extent_root,
3152                                         orphan->disk_bytenr, orphan->disk_len,
3153                                         0, root->objectid, orphan->objectid,
3154                                         orphan->offset);
3155                         if (ret < 0)
3156                                 goto out;
3157                 }
3158                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3159                                 orphan->offset, orphan->disk_bytenr,
3160                                 orphan->disk_len, orphan->disk_len);
3161                 if (ret < 0)
3162                         goto out;
3163
3164                 /* Update file size info */
3165                 rec->found_size += orphan->disk_len;
3166                 if (rec->found_size == rec->nbytes)
3167                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3168
3169                 /* Update the file extent hole info too */
3170                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3171                                            orphan->disk_len);
3172                 if (ret < 0)
3173                         goto out;
3174                 if (RB_EMPTY_ROOT(&rec->holes))
3175                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3176
3177                 list_del(&orphan->list);
3178                 free(orphan);
3179         }
3180         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3181 out:
3182         return ret;
3183 }
3184
3185 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3186                                         struct btrfs_root *root,
3187                                         struct btrfs_path *path,
3188                                         struct inode_record *rec)
3189 {
3190         struct rb_node *node;
3191         struct file_extent_hole *hole;
3192         int found = 0;
3193         int ret = 0;
3194
3195         node = rb_first(&rec->holes);
3196
3197         while (node) {
3198                 found = 1;
3199                 hole = rb_entry(node, struct file_extent_hole, node);
3200                 ret = btrfs_punch_hole(trans, root, rec->ino,
3201                                        hole->start, hole->len);
3202                 if (ret < 0)
3203                         goto out;
3204                 ret = del_file_extent_hole(&rec->holes, hole->start,
3205                                            hole->len);
3206                 if (ret < 0)
3207                         goto out;
3208                 if (RB_EMPTY_ROOT(&rec->holes))
3209                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3210                 node = rb_first(&rec->holes);
3211         }
3212         /* special case for a file losing all its file extent */
3213         if (!found) {
3214                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3215                                        round_up(rec->isize, root->sectorsize));
3216                 if (ret < 0)
3217                         goto out;
3218         }
3219         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3220                rec->ino, root->objectid);
3221 out:
3222         return ret;
3223 }
3224
3225 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3226 {
3227         struct btrfs_trans_handle *trans;
3228         struct btrfs_path path;
3229         int ret = 0;
3230
3231         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3232                              I_ERR_NO_ORPHAN_ITEM |
3233                              I_ERR_LINK_COUNT_WRONG |
3234                              I_ERR_NO_INODE_ITEM |
3235                              I_ERR_FILE_EXTENT_ORPHAN |
3236                              I_ERR_FILE_EXTENT_DISCOUNT|
3237                              I_ERR_FILE_NBYTES_WRONG)))
3238                 return rec->errors;
3239
3240         /*
3241          * For nlink repair, it may create a dir and add link, so
3242          * 2 for parent(256)'s dir_index and dir_item
3243          * 2 for lost+found dir's inode_item and inode_ref
3244          * 1 for the new inode_ref of the file
3245          * 2 for lost+found dir's dir_index and dir_item for the file
3246          */
3247         trans = btrfs_start_transaction(root, 7);
3248         if (IS_ERR(trans))
3249                 return PTR_ERR(trans);
3250
3251         btrfs_init_path(&path);
3252         if (rec->errors & I_ERR_NO_INODE_ITEM)
3253                 ret = repair_inode_no_item(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3255                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3257                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3259                 ret = repair_inode_isize(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3261                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3263                 ret = repair_inode_nlinks(trans, root, &path, rec);
3264         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3265                 ret = repair_inode_nbytes(trans, root, &path, rec);
3266         btrfs_commit_transaction(trans, root);
3267         btrfs_release_path(&path);
3268         return ret;
3269 }
3270
3271 static int check_inode_recs(struct btrfs_root *root,
3272                             struct cache_tree *inode_cache)
3273 {
3274         struct cache_extent *cache;
3275         struct ptr_node *node;
3276         struct inode_record *rec;
3277         struct inode_backref *backref;
3278         int stage = 0;
3279         int ret = 0;
3280         int err = 0;
3281         u64 error = 0;
3282         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3283
3284         if (btrfs_root_refs(&root->root_item) == 0) {
3285                 if (!cache_tree_empty(inode_cache))
3286                         fprintf(stderr, "warning line %d\n", __LINE__);
3287                 return 0;
3288         }
3289
3290         /*
3291          * We need to repair backrefs first because we could change some of the
3292          * errors in the inode recs.
3293          *
3294          * We also need to go through and delete invalid backrefs first and then
3295          * add the correct ones second.  We do this because we may get EEXIST
3296          * when adding back the correct index because we hadn't yet deleted the
3297          * invalid index.
3298          *
3299          * For example, if we were missing a dir index then the directories
3300          * isize would be wrong, so if we fixed the isize to what we thought it
3301          * would be and then fixed the backref we'd still have a invalid fs, so
3302          * we need to add back the dir index and then check to see if the isize
3303          * is still wrong.
3304          */
3305         while (stage < 3) {
3306                 stage++;
3307                 if (stage == 3 && !err)
3308                         break;
3309
3310                 cache = search_cache_extent(inode_cache, 0);
3311                 while (repair && cache) {
3312                         node = container_of(cache, struct ptr_node, cache);
3313                         rec = node->data;
3314                         cache = next_cache_extent(cache);
3315
3316                         /* Need to free everything up and rescan */
3317                         if (stage == 3) {
3318                                 remove_cache_extent(inode_cache, &node->cache);
3319                                 free(node);
3320                                 free_inode_rec(rec);
3321                                 continue;
3322                         }
3323
3324                         if (list_empty(&rec->backrefs))
3325                                 continue;
3326
3327                         ret = repair_inode_backrefs(root, rec, inode_cache,
3328                                                     stage == 1);
3329                         if (ret < 0) {
3330                                 err = ret;
3331                                 stage = 2;
3332                                 break;
3333                         } if (ret > 0) {
3334                                 err = -EAGAIN;
3335                         }
3336                 }
3337         }
3338         if (err)
3339                 return err;
3340
3341         rec = get_inode_rec(inode_cache, root_dirid, 0);
3342         BUG_ON(IS_ERR(rec));
3343         if (rec) {
3344                 ret = check_root_dir(rec);
3345                 if (ret) {
3346                         fprintf(stderr, "root %llu root dir %llu error\n",
3347                                 (unsigned long long)root->root_key.objectid,
3348                                 (unsigned long long)root_dirid);
3349                         print_inode_error(root, rec);
3350                         error++;
3351                 }
3352         } else {
3353                 if (repair) {
3354                         struct btrfs_trans_handle *trans;
3355
3356                         trans = btrfs_start_transaction(root, 1);
3357                         if (IS_ERR(trans)) {
3358                                 err = PTR_ERR(trans);
3359                                 return err;
3360                         }
3361
3362                         fprintf(stderr,
3363                                 "root %llu missing its root dir, recreating\n",
3364                                 (unsigned long long)root->objectid);
3365
3366                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3367                         BUG_ON(ret);
3368
3369                         btrfs_commit_transaction(trans, root);
3370                         return -EAGAIN;
3371                 }
3372
3373                 fprintf(stderr, "root %llu root dir %llu not found\n",
3374                         (unsigned long long)root->root_key.objectid,
3375                         (unsigned long long)root_dirid);
3376         }
3377
3378         while (1) {
3379                 cache = search_cache_extent(inode_cache, 0);
3380                 if (!cache)
3381                         break;
3382                 node = container_of(cache, struct ptr_node, cache);
3383                 rec = node->data;
3384                 remove_cache_extent(inode_cache, &node->cache);
3385                 free(node);
3386                 if (rec->ino == root_dirid ||
3387                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3388                         free_inode_rec(rec);
3389                         continue;
3390                 }
3391
3392                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3393                         ret = check_orphan_item(root, rec->ino);
3394                         if (ret == 0)
3395                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3396                         if (can_free_inode_rec(rec)) {
3397                                 free_inode_rec(rec);
3398                                 continue;
3399                         }
3400                 }
3401
3402                 if (!rec->found_inode_item)
3403                         rec->errors |= I_ERR_NO_INODE_ITEM;
3404                 if (rec->found_link != rec->nlink)
3405                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3406                 if (repair) {
3407                         ret = try_repair_inode(root, rec);
3408                         if (ret == 0 && can_free_inode_rec(rec)) {
3409                                 free_inode_rec(rec);
3410                                 continue;
3411                         }
3412                         ret = 0;
3413                 }
3414
3415                 if (!(repair && ret == 0))
3416                         error++;
3417                 print_inode_error(root, rec);
3418                 list_for_each_entry(backref, &rec->backrefs, list) {
3419                         if (!backref->found_dir_item)
3420                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3421                         if (!backref->found_dir_index)
3422                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3423                         if (!backref->found_inode_ref)
3424                                 backref->errors |= REF_ERR_NO_INODE_REF;
3425                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3426                                 " namelen %u name %s filetype %d errors %x",
3427                                 (unsigned long long)backref->dir,
3428                                 (unsigned long long)backref->index,
3429                                 backref->namelen, backref->name,
3430                                 backref->filetype, backref->errors);
3431                         print_ref_error(backref->errors);
3432                 }
3433                 free_inode_rec(rec);
3434         }
3435         return (error > 0) ? -1 : 0;
3436 }
3437
3438 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3439                                         u64 objectid)
3440 {
3441         struct cache_extent *cache;
3442         struct root_record *rec = NULL;
3443         int ret;
3444
3445         cache = lookup_cache_extent(root_cache, objectid, 1);
3446         if (cache) {
3447                 rec = container_of(cache, struct root_record, cache);
3448         } else {
3449                 rec = calloc(1, sizeof(*rec));
3450                 if (!rec)
3451                         return ERR_PTR(-ENOMEM);
3452                 rec->objectid = objectid;
3453                 INIT_LIST_HEAD(&rec->backrefs);
3454                 rec->cache.start = objectid;
3455                 rec->cache.size = 1;
3456
3457                 ret = insert_cache_extent(root_cache, &rec->cache);
3458                 if (ret)
3459                         return ERR_PTR(-EEXIST);
3460         }
3461         return rec;
3462 }
3463
3464 static struct root_backref *get_root_backref(struct root_record *rec,
3465                                              u64 ref_root, u64 dir, u64 index,
3466                                              const char *name, int namelen)
3467 {
3468         struct root_backref *backref;
3469
3470         list_for_each_entry(backref, &rec->backrefs, list) {
3471                 if (backref->ref_root != ref_root || backref->dir != dir ||
3472                     backref->namelen != namelen)
3473                         continue;
3474                 if (memcmp(name, backref->name, namelen))
3475                         continue;
3476                 return backref;
3477         }
3478
3479         backref = calloc(1, sizeof(*backref) + namelen + 1);
3480         if (!backref)
3481                 return NULL;
3482         backref->ref_root = ref_root;
3483         backref->dir = dir;
3484         backref->index = index;
3485         backref->namelen = namelen;
3486         memcpy(backref->name, name, namelen);
3487         backref->name[namelen] = '\0';
3488         list_add_tail(&backref->list, &rec->backrefs);
3489         return backref;
3490 }
3491
3492 static void free_root_record(struct cache_extent *cache)
3493 {
3494         struct root_record *rec;
3495         struct root_backref *backref;
3496
3497         rec = container_of(cache, struct root_record, cache);
3498         while (!list_empty(&rec->backrefs)) {
3499                 backref = to_root_backref(rec->backrefs.next);
3500                 list_del(&backref->list);
3501                 free(backref);
3502         }
3503
3504         free(rec);
3505 }
3506
3507 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3508
3509 static int add_root_backref(struct cache_tree *root_cache,
3510                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3511                             const char *name, int namelen,
3512                             int item_type, int errors)
3513 {
3514         struct root_record *rec;
3515         struct root_backref *backref;
3516
3517         rec = get_root_rec(root_cache, root_id);
3518         BUG_ON(IS_ERR(rec));
3519         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3520         BUG_ON(!backref);
3521
3522         backref->errors |= errors;
3523
3524         if (item_type != BTRFS_DIR_ITEM_KEY) {
3525                 if (backref->found_dir_index || backref->found_back_ref ||
3526                     backref->found_forward_ref) {
3527                         if (backref->index != index)
3528                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3529                 } else {
3530                         backref->index = index;
3531                 }
3532         }
3533
3534         if (item_type == BTRFS_DIR_ITEM_KEY) {
3535                 if (backref->found_forward_ref)
3536                         rec->found_ref++;
3537                 backref->found_dir_item = 1;
3538         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3539                 backref->found_dir_index = 1;
3540         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3541                 if (backref->found_forward_ref)
3542                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3543                 else if (backref->found_dir_item)
3544                         rec->found_ref++;
3545                 backref->found_forward_ref = 1;
3546         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3547                 if (backref->found_back_ref)
3548                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3549                 backref->found_back_ref = 1;
3550         } else {
3551                 BUG_ON(1);
3552         }
3553
3554         if (backref->found_forward_ref && backref->found_dir_item)
3555                 backref->reachable = 1;
3556         return 0;
3557 }
3558
3559 static int merge_root_recs(struct btrfs_root *root,
3560                            struct cache_tree *src_cache,
3561                            struct cache_tree *dst_cache)
3562 {
3563         struct cache_extent *cache;
3564         struct ptr_node *node;
3565         struct inode_record *rec;
3566         struct inode_backref *backref;
3567         int ret = 0;
3568
3569         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3570                 free_inode_recs_tree(src_cache);
3571                 return 0;
3572         }
3573
3574         while (1) {
3575                 cache = search_cache_extent(src_cache, 0);
3576                 if (!cache)
3577                         break;
3578                 node = container_of(cache, struct ptr_node, cache);
3579                 rec = node->data;
3580                 remove_cache_extent(src_cache, &node->cache);
3581                 free(node);
3582
3583                 ret = is_child_root(root, root->objectid, rec->ino);
3584                 if (ret < 0)
3585                         break;
3586                 else if (ret == 0)
3587                         goto skip;
3588
3589                 list_for_each_entry(backref, &rec->backrefs, list) {
3590                         BUG_ON(backref->found_inode_ref);
3591                         if (backref->found_dir_item)
3592                                 add_root_backref(dst_cache, rec->ino,
3593                                         root->root_key.objectid, backref->dir,
3594                                         backref->index, backref->name,
3595                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3596                                         backref->errors);
3597                         if (backref->found_dir_index)
3598                                 add_root_backref(dst_cache, rec->ino,
3599                                         root->root_key.objectid, backref->dir,
3600                                         backref->index, backref->name,
3601                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3602                                         backref->errors);
3603                 }
3604 skip:
3605                 free_inode_rec(rec);
3606         }
3607         if (ret < 0)
3608                 return ret;
3609         return 0;
3610 }
3611
3612 static int check_root_refs(struct btrfs_root *root,
3613                            struct cache_tree *root_cache)
3614 {
3615         struct root_record *rec;
3616         struct root_record *ref_root;
3617         struct root_backref *backref;
3618         struct cache_extent *cache;
3619         int loop = 1;
3620         int ret;
3621         int error;
3622         int errors = 0;
3623
3624         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3625         BUG_ON(IS_ERR(rec));
3626         rec->found_ref = 1;
3627
3628         /* fixme: this can not detect circular references */
3629         while (loop) {
3630                 loop = 0;
3631                 cache = search_cache_extent(root_cache, 0);
3632                 while (1) {
3633                         if (!cache)
3634                                 break;
3635                         rec = container_of(cache, struct root_record, cache);
3636                         cache = next_cache_extent(cache);
3637
3638                         if (rec->found_ref == 0)
3639                                 continue;
3640
3641                         list_for_each_entry(backref, &rec->backrefs, list) {
3642                                 if (!backref->reachable)
3643                                         continue;
3644
3645                                 ref_root = get_root_rec(root_cache,
3646                                                         backref->ref_root);
3647                                 BUG_ON(IS_ERR(ref_root));
3648                                 if (ref_root->found_ref > 0)
3649                                         continue;
3650
3651                                 backref->reachable = 0;
3652                                 rec->found_ref--;
3653                                 if (rec->found_ref == 0)
3654                                         loop = 1;
3655                         }
3656                 }
3657         }
3658
3659         cache = search_cache_extent(root_cache, 0);
3660         while (1) {
3661                 if (!cache)
3662                         break;
3663                 rec = container_of(cache, struct root_record, cache);
3664                 cache = next_cache_extent(cache);
3665
3666                 if (rec->found_ref == 0 &&
3667                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3668                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3669                         ret = check_orphan_item(root->fs_info->tree_root,
3670                                                 rec->objectid);
3671                         if (ret == 0)
3672                                 continue;
3673
3674                         /*
3675                          * If we don't have a root item then we likely just have
3676                          * a dir item in a snapshot for this root but no actual
3677                          * ref key or anything so it's meaningless.
3678                          */
3679                         if (!rec->found_root_item)
3680                                 continue;
3681                         errors++;
3682                         fprintf(stderr, "fs tree %llu not referenced\n",
3683                                 (unsigned long long)rec->objectid);
3684                 }
3685
3686                 error = 0;
3687                 if (rec->found_ref > 0 && !rec->found_root_item)
3688                         error = 1;
3689                 list_for_each_entry(backref, &rec->backrefs, list) {
3690                         if (!backref->found_dir_item)
3691                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3692                         if (!backref->found_dir_index)
3693                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3694                         if (!backref->found_back_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3696                         if (!backref->found_forward_ref)
3697                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3698                         if (backref->reachable && backref->errors)
3699                                 error = 1;
3700                 }
3701                 if (!error)
3702                         continue;
3703
3704                 errors++;
3705                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3706                         (unsigned long long)rec->objectid, rec->found_ref,
3707                          rec->found_root_item ? "" : "not found");
3708
3709                 list_for_each_entry(backref, &rec->backrefs, list) {
3710                         if (!backref->reachable)
3711                                 continue;
3712                         if (!backref->errors && rec->found_root_item)
3713                                 continue;
3714                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3715                                 " index %llu namelen %u name %s errors %x\n",
3716                                 (unsigned long long)backref->ref_root,
3717                                 (unsigned long long)backref->dir,
3718                                 (unsigned long long)backref->index,
3719                                 backref->namelen, backref->name,
3720                                 backref->errors);
3721                         print_ref_error(backref->errors);
3722                 }
3723         }
3724         return errors > 0 ? 1 : 0;
3725 }
3726
3727 static int process_root_ref(struct extent_buffer *eb, int slot,
3728                             struct btrfs_key *key,
3729                             struct cache_tree *root_cache)
3730 {
3731         u64 dirid;
3732         u64 index;
3733         u32 len;
3734         u32 name_len;
3735         struct btrfs_root_ref *ref;
3736         char namebuf[BTRFS_NAME_LEN];
3737         int error;
3738
3739         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3740
3741         dirid = btrfs_root_ref_dirid(eb, ref);
3742         index = btrfs_root_ref_sequence(eb, ref);
3743         name_len = btrfs_root_ref_name_len(eb, ref);
3744
3745         if (name_len <= BTRFS_NAME_LEN) {
3746                 len = name_len;
3747                 error = 0;
3748         } else {
3749                 len = BTRFS_NAME_LEN;
3750                 error = REF_ERR_NAME_TOO_LONG;
3751         }
3752         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3753
3754         if (key->type == BTRFS_ROOT_REF_KEY) {
3755                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3756                                  index, namebuf, len, key->type, error);
3757         } else {
3758                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3759                                  index, namebuf, len, key->type, error);
3760         }
3761         return 0;
3762 }
3763
3764 static void free_corrupt_block(struct cache_extent *cache)
3765 {
3766         struct btrfs_corrupt_block *corrupt;
3767
3768         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3769         free(corrupt);
3770 }
3771
3772 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3773
3774 /*
3775  * Repair the btree of the given root.
3776  *
3777  * The fix is to remove the node key in corrupt_blocks cache_tree.
3778  * and rebalance the tree.
3779  * After the fix, the btree should be writeable.
3780  */
3781 static int repair_btree(struct btrfs_root *root,
3782                         struct cache_tree *corrupt_blocks)
3783 {
3784         struct btrfs_trans_handle *trans;
3785         struct btrfs_path path;
3786         struct btrfs_corrupt_block *corrupt;
3787         struct cache_extent *cache;
3788         struct btrfs_key key;
3789         u64 offset;
3790         int level;
3791         int ret = 0;
3792
3793         if (cache_tree_empty(corrupt_blocks))
3794                 return 0;
3795
3796         trans = btrfs_start_transaction(root, 1);
3797         if (IS_ERR(trans)) {
3798                 ret = PTR_ERR(trans);
3799                 fprintf(stderr, "Error starting transaction: %s\n",
3800                         strerror(-ret));
3801                 return ret;
3802         }
3803         btrfs_init_path(&path);
3804         cache = first_cache_extent(corrupt_blocks);
3805         while (cache) {
3806                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3807                                        cache);
3808                 level = corrupt->level;
3809                 path.lowest_level = level;
3810                 key.objectid = corrupt->key.objectid;
3811                 key.type = corrupt->key.type;
3812                 key.offset = corrupt->key.offset;
3813
3814                 /*
3815                  * Here we don't want to do any tree balance, since it may
3816                  * cause a balance with corrupted brother leaf/node,
3817                  * so ins_len set to 0 here.
3818                  * Balance will be done after all corrupt node/leaf is deleted.
3819                  */
3820                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3821                 if (ret < 0)
3822                         goto out;
3823                 offset = btrfs_node_blockptr(path.nodes[level],
3824                                              path.slots[level]);
3825
3826                 /* Remove the ptr */
3827                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3828                 if (ret < 0)
3829                         goto out;
3830                 /*
3831                  * Remove the corresponding extent
3832                  * return value is not concerned.
3833                  */
3834                 btrfs_release_path(&path);
3835                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3836                                         0, root->root_key.objectid,
3837                                         level - 1, 0);
3838                 cache = next_cache_extent(cache);
3839         }
3840
3841         /* Balance the btree using btrfs_search_slot() */
3842         cache = first_cache_extent(corrupt_blocks);
3843         while (cache) {
3844                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3845                                        cache);
3846                 memcpy(&key, &corrupt->key, sizeof(key));
3847                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3848                 if (ret < 0)
3849                         goto out;
3850                 /* return will always >0 since it won't find the item */
3851                 ret = 0;
3852                 btrfs_release_path(&path);
3853                 cache = next_cache_extent(cache);
3854         }
3855 out:
3856         btrfs_commit_transaction(trans, root);
3857         btrfs_release_path(&path);
3858         return ret;
3859 }
3860
3861 static int check_fs_root(struct btrfs_root *root,
3862                          struct cache_tree *root_cache,
3863                          struct walk_control *wc)
3864 {
3865         int ret = 0;
3866         int err = 0;
3867         int wret;
3868         int level;
3869         struct btrfs_path path;
3870         struct shared_node root_node;
3871         struct root_record *rec;
3872         struct btrfs_root_item *root_item = &root->root_item;
3873         struct cache_tree corrupt_blocks;
3874         struct orphan_data_extent *orphan;
3875         struct orphan_data_extent *tmp;
3876         enum btrfs_tree_block_status status;
3877         struct node_refs nrefs;
3878
3879         /*
3880          * Reuse the corrupt_block cache tree to record corrupted tree block
3881          *
3882          * Unlike the usage in extent tree check, here we do it in a per
3883          * fs/subvol tree base.
3884          */
3885         cache_tree_init(&corrupt_blocks);
3886         root->fs_info->corrupt_blocks = &corrupt_blocks;
3887
3888         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3889                 rec = get_root_rec(root_cache, root->root_key.objectid);
3890                 BUG_ON(IS_ERR(rec));
3891                 if (btrfs_root_refs(root_item) > 0)
3892                         rec->found_root_item = 1;
3893         }
3894
3895         btrfs_init_path(&path);
3896         memset(&root_node, 0, sizeof(root_node));
3897         cache_tree_init(&root_node.root_cache);
3898         cache_tree_init(&root_node.inode_cache);
3899         memset(&nrefs, 0, sizeof(nrefs));
3900
3901         /* Move the orphan extent record to corresponding inode_record */
3902         list_for_each_entry_safe(orphan, tmp,
3903                                  &root->orphan_data_extents, list) {
3904                 struct inode_record *inode;
3905
3906                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3907                                       1);
3908                 BUG_ON(IS_ERR(inode));
3909                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3910                 list_move(&orphan->list, &inode->orphan_extents);
3911         }
3912
3913         level = btrfs_header_level(root->node);
3914         memset(wc->nodes, 0, sizeof(wc->nodes));
3915         wc->nodes[level] = &root_node;
3916         wc->active_node = level;
3917         wc->root_level = level;
3918
3919         /* We may not have checked the root block, lets do that now */
3920         if (btrfs_is_leaf(root->node))
3921                 status = btrfs_check_leaf(root, NULL, root->node);
3922         else
3923                 status = btrfs_check_node(root, NULL, root->node);
3924         if (status != BTRFS_TREE_BLOCK_CLEAN)
3925                 return -EIO;
3926
3927         if (btrfs_root_refs(root_item) > 0 ||
3928             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3929                 path.nodes[level] = root->node;
3930                 extent_buffer_get(root->node);
3931                 path.slots[level] = 0;
3932         } else {
3933                 struct btrfs_key key;
3934                 struct btrfs_disk_key found_key;
3935
3936                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3937                 level = root_item->drop_level;
3938                 path.lowest_level = level;
3939                 if (level > btrfs_header_level(root->node) ||
3940                     level >= BTRFS_MAX_LEVEL) {
3941                         error("ignoring invalid drop level: %u", level);
3942                         goto skip_walking;
3943                 }
3944                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3945                 if (wret < 0)
3946                         goto skip_walking;
3947                 btrfs_node_key(path.nodes[level], &found_key,
3948                                 path.slots[level]);
3949                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3950                                         sizeof(found_key)));
3951         }
3952
3953         while (1) {
3954                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3955                 if (wret < 0)
3956                         ret = wret;
3957                 if (wret != 0)
3958                         break;
3959
3960                 wret = walk_up_tree(root, &path, wc, &level);
3961                 if (wret < 0)
3962                         ret = wret;
3963                 if (wret != 0)
3964                         break;
3965         }
3966 skip_walking:
3967         btrfs_release_path(&path);
3968
3969         if (!cache_tree_empty(&corrupt_blocks)) {
3970                 struct cache_extent *cache;
3971                 struct btrfs_corrupt_block *corrupt;
3972
3973                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3974                        root->root_key.objectid);
3975                 cache = first_cache_extent(&corrupt_blocks);
3976                 while (cache) {
3977                         corrupt = container_of(cache,
3978                                                struct btrfs_corrupt_block,
3979                                                cache);
3980                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3981                                cache->start, corrupt->level,
3982                                corrupt->key.objectid, corrupt->key.type,
3983                                corrupt->key.offset);
3984                         cache = next_cache_extent(cache);
3985                 }
3986                 if (repair) {
3987                         printf("Try to repair the btree for root %llu\n",
3988                                root->root_key.objectid);
3989                         ret = repair_btree(root, &corrupt_blocks);
3990                         if (ret < 0)
3991                                 fprintf(stderr, "Failed to repair btree: %s\n",
3992                                         strerror(-ret));
3993                         if (!ret)
3994                                 printf("Btree for root %llu is fixed\n",
3995                                        root->root_key.objectid);
3996                 }
3997         }
3998
3999         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4000         if (err < 0)
4001                 ret = err;
4002
4003         if (root_node.current) {
4004                 root_node.current->checked = 1;
4005                 maybe_free_inode_rec(&root_node.inode_cache,
4006                                 root_node.current);
4007         }
4008
4009         err = check_inode_recs(root, &root_node.inode_cache);
4010         if (!ret)
4011                 ret = err;
4012
4013         free_corrupt_blocks_tree(&corrupt_blocks);
4014         root->fs_info->corrupt_blocks = NULL;
4015         free_orphan_data_extents(&root->orphan_data_extents);
4016         return ret;
4017 }
4018
4019 static int fs_root_objectid(u64 objectid)
4020 {
4021         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4022             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4023                 return 1;
4024         return is_fstree(objectid);
4025 }
4026
4027 static int check_fs_roots(struct btrfs_root *root,
4028                           struct cache_tree *root_cache)
4029 {
4030         struct btrfs_path path;
4031         struct btrfs_key key;
4032         struct walk_control wc;
4033         struct extent_buffer *leaf, *tree_node;
4034         struct btrfs_root *tmp_root;
4035         struct btrfs_root *tree_root = root->fs_info->tree_root;
4036         int ret;
4037         int err = 0;
4038
4039         if (ctx.progress_enabled) {
4040                 ctx.tp = TASK_FS_ROOTS;
4041                 task_start(ctx.info);
4042         }
4043
4044         /*
4045          * Just in case we made any changes to the extent tree that weren't
4046          * reflected into the free space cache yet.
4047          */
4048         if (repair)
4049                 reset_cached_block_groups(root->fs_info);
4050         memset(&wc, 0, sizeof(wc));
4051         cache_tree_init(&wc.shared);
4052         btrfs_init_path(&path);
4053
4054 again:
4055         key.offset = 0;
4056         key.objectid = 0;
4057         key.type = BTRFS_ROOT_ITEM_KEY;
4058         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4059         if (ret < 0) {
4060                 err = 1;
4061                 goto out;
4062         }
4063         tree_node = tree_root->node;
4064         while (1) {
4065                 if (tree_node != tree_root->node) {
4066                         free_root_recs_tree(root_cache);
4067                         btrfs_release_path(&path);
4068                         goto again;
4069                 }
4070                 leaf = path.nodes[0];
4071                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4072                         ret = btrfs_next_leaf(tree_root, &path);
4073                         if (ret) {
4074                                 if (ret < 0)
4075                                         err = 1;
4076                                 break;
4077                         }
4078                         leaf = path.nodes[0];
4079                 }
4080                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4081                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4082                     fs_root_objectid(key.objectid)) {
4083                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4084                                 tmp_root = btrfs_read_fs_root_no_cache(
4085                                                 root->fs_info, &key);
4086                         } else {
4087                                 key.offset = (u64)-1;
4088                                 tmp_root = btrfs_read_fs_root(
4089                                                 root->fs_info, &key);
4090                         }
4091                         if (IS_ERR(tmp_root)) {
4092                                 err = 1;
4093                                 goto next;
4094                         }
4095                         ret = check_fs_root(tmp_root, root_cache, &wc);
4096                         if (ret == -EAGAIN) {
4097                                 free_root_recs_tree(root_cache);
4098                                 btrfs_release_path(&path);
4099                                 goto again;
4100                         }
4101                         if (ret)
4102                                 err = 1;
4103                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4104                                 btrfs_free_fs_root(tmp_root);
4105                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4106                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4107                         process_root_ref(leaf, path.slots[0], &key,
4108                                          root_cache);
4109                 }
4110 next:
4111                 path.slots[0]++;
4112         }
4113 out:
4114         btrfs_release_path(&path);
4115         if (err)
4116                 free_extent_cache_tree(&wc.shared);
4117         if (!cache_tree_empty(&wc.shared))
4118                 fprintf(stderr, "warning line %d\n", __LINE__);
4119
4120         task_stop(ctx.info);
4121
4122         return err;
4123 }
4124
4125 /*
4126  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4127  * INODE_REF/INODE_EXTREF match.
4128  *
4129  * @root:       the root of the fs/file tree
4130  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4131  * @key:        the key of the DIR_ITEM/DIR_INDEX
4132  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4133  *              distinguish root_dir between normal dir/file
4134  * @name:       the name in the INODE_REF/INODE_EXTREF
4135  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4136  * @mode:       the st_mode of INODE_ITEM
4137  *
4138  * Return 0 if no error occurred.
4139  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4140  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4141  * dir/file.
4142  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4143  * not match for normal dir/file.
4144  */
4145 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4146                          struct btrfs_key *key, u64 index, char *name,
4147                          u32 namelen, u32 mode)
4148 {
4149         struct btrfs_path path;
4150         struct extent_buffer *node;
4151         struct btrfs_dir_item *di;
4152         struct btrfs_key location;
4153         char namebuf[BTRFS_NAME_LEN] = {0};
4154         u32 total;
4155         u32 cur = 0;
4156         u32 len;
4157         u32 name_len;
4158         u32 data_len;
4159         u8 filetype;
4160         int slot;
4161         int ret;
4162
4163         btrfs_init_path(&path);
4164         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4165         if (ret < 0) {
4166                 ret = DIR_ITEM_MISSING;
4167                 goto out;
4168         }
4169
4170         /* Process root dir and goto out*/
4171         if (index == 0) {
4172                 if (ret == 0) {
4173                         ret = ROOT_DIR_ERROR;
4174                         error(
4175                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4176                                 root->objectid,
4177                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4178                                         "REF" : "EXTREF",
4179                                 ref_key->objectid, ref_key->offset,
4180                                 key->type == BTRFS_DIR_ITEM_KEY ?
4181                                         "DIR_ITEM" : "DIR_INDEX");
4182                 } else {
4183                         ret = 0;
4184                 }
4185
4186                 goto out;
4187         }
4188
4189         /* Process normal file/dir */
4190         if (ret > 0) {
4191                 ret = DIR_ITEM_MISSING;
4192                 error(
4193                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4194                         root->objectid,
4195                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4196                         ref_key->objectid, ref_key->offset,
4197                         key->type == BTRFS_DIR_ITEM_KEY ?
4198                                 "DIR_ITEM" : "DIR_INDEX",
4199                         key->objectid, key->offset, namelen, name,
4200                         imode_to_type(mode));
4201                 goto out;
4202         }
4203
4204         /* Check whether inode_id/filetype/name match */
4205         node = path.nodes[0];
4206         slot = path.slots[0];
4207         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4208         total = btrfs_item_size_nr(node, slot);
4209         while (cur < total) {
4210                 ret = DIR_ITEM_MISMATCH;
4211                 name_len = btrfs_dir_name_len(node, di);
4212                 data_len = btrfs_dir_data_len(node, di);
4213
4214                 btrfs_dir_item_key_to_cpu(node, di, &location);
4215                 if (location.objectid != ref_key->objectid ||
4216                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4217                     location.offset != 0)
4218                         goto next;
4219
4220                 filetype = btrfs_dir_type(node, di);
4221                 if (imode_to_type(mode) != filetype)
4222                         goto next;
4223
4224                 if (name_len <= BTRFS_NAME_LEN) {
4225                         len = name_len;
4226                 } else {
4227                         len = BTRFS_NAME_LEN;
4228                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4229                         root->objectid,
4230                         key->type == BTRFS_DIR_ITEM_KEY ?
4231                         "DIR_ITEM" : "DIR_INDEX",
4232                         key->objectid, key->offset, name_len);
4233                 }
4234                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4235                 if (len != namelen || strncmp(namebuf, name, len))
4236                         goto next;
4237
4238                 ret = 0;
4239                 goto out;
4240 next:
4241                 len = sizeof(*di) + name_len + data_len;
4242                 di = (struct btrfs_dir_item *)((char *)di + len);
4243                 cur += len;
4244         }
4245         if (ret == DIR_ITEM_MISMATCH)
4246                 error(
4247                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4248                         root->objectid,
4249                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4250                         ref_key->objectid, ref_key->offset,
4251                         key->type == BTRFS_DIR_ITEM_KEY ?
4252                                 "DIR_ITEM" : "DIR_INDEX",
4253                         key->objectid, key->offset, namelen, name,
4254                         imode_to_type(mode));
4255 out:
4256         btrfs_release_path(&path);
4257         return ret;
4258 }
4259
4260 /*
4261  * Traverse the given INODE_REF and call find_dir_item() to find related
4262  * DIR_ITEM/DIR_INDEX.
4263  *
4264  * @root:       the root of the fs/file tree
4265  * @ref_key:    the key of the INODE_REF
4266  * @refs:       the count of INODE_REF
4267  * @mode:       the st_mode of INODE_ITEM
4268  *
4269  * Return 0 if no error occurred.
4270  */
4271 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4272                            struct extent_buffer *node, int slot, u64 *refs,
4273                            int mode)
4274 {
4275         struct btrfs_key key;
4276         struct btrfs_inode_ref *ref;
4277         char namebuf[BTRFS_NAME_LEN] = {0};
4278         u32 total;
4279         u32 cur = 0;
4280         u32 len;
4281         u32 name_len;
4282         u64 index;
4283         int ret, err = 0;
4284
4285         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4286         total = btrfs_item_size_nr(node, slot);
4287
4288 next:
4289         /* Update inode ref count */
4290         (*refs)++;
4291
4292         index = btrfs_inode_ref_index(node, ref);
4293         name_len = btrfs_inode_ref_name_len(node, ref);
4294         if (name_len <= BTRFS_NAME_LEN) {
4295                 len = name_len;
4296         } else {
4297                 len = BTRFS_NAME_LEN;
4298                 warning("root %llu INODE_REF[%llu %llu] name too long",
4299                         root->objectid, ref_key->objectid, ref_key->offset);
4300         }
4301
4302         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4303
4304         /* Check root dir ref name */
4305         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4306                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4307                       root->objectid, ref_key->objectid, ref_key->offset,
4308                       namebuf);
4309                 err |= ROOT_DIR_ERROR;
4310         }
4311
4312         /* Find related DIR_INDEX */
4313         key.objectid = ref_key->offset;
4314         key.type = BTRFS_DIR_INDEX_KEY;
4315         key.offset = index;
4316         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4317         err |= ret;
4318
4319         /* Find related dir_item */
4320         key.objectid = ref_key->offset;
4321         key.type = BTRFS_DIR_ITEM_KEY;
4322         key.offset = btrfs_name_hash(namebuf, len);
4323         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4324         err |= ret;
4325
4326         len = sizeof(*ref) + name_len;
4327         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4328         cur += len;
4329         if (cur < total)
4330                 goto next;
4331
4332         return err;
4333 }
4334
4335 /*
4336  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4337  * DIR_ITEM/DIR_INDEX.
4338  *
4339  * @root:       the root of the fs/file tree
4340  * @ref_key:    the key of the INODE_EXTREF
4341  * @refs:       the count of INODE_EXTREF
4342  * @mode:       the st_mode of INODE_ITEM
4343  *
4344  * Return 0 if no error occurred.
4345  */
4346 static int check_inode_extref(struct btrfs_root *root,
4347                               struct btrfs_key *ref_key,
4348                               struct extent_buffer *node, int slot, u64 *refs,
4349                               int mode)
4350 {
4351         struct btrfs_key key;
4352         struct btrfs_inode_extref *extref;
4353         char namebuf[BTRFS_NAME_LEN] = {0};
4354         u32 total;
4355         u32 cur = 0;
4356         u32 len;
4357         u32 name_len;
4358         u64 index;
4359         u64 parent;
4360         int ret;
4361         int err = 0;
4362
4363         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4364         total = btrfs_item_size_nr(node, slot);
4365
4366 next:
4367         /* update inode ref count */
4368         (*refs)++;
4369         name_len = btrfs_inode_extref_name_len(node, extref);
4370         index = btrfs_inode_extref_index(node, extref);
4371         parent = btrfs_inode_extref_parent(node, extref);
4372         if (name_len <= BTRFS_NAME_LEN) {
4373                 len = name_len;
4374         } else {
4375                 len = BTRFS_NAME_LEN;
4376                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4377                         root->objectid, ref_key->objectid, ref_key->offset);
4378         }
4379         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4380
4381         /* Check root dir ref name */
4382         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4383                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4384                       root->objectid, ref_key->objectid, ref_key->offset,
4385                       namebuf);
4386                 err |= ROOT_DIR_ERROR;
4387         }
4388
4389         /* find related dir_index */
4390         key.objectid = parent;
4391         key.type = BTRFS_DIR_INDEX_KEY;
4392         key.offset = index;
4393         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4394         err |= ret;
4395
4396         /* find related dir_item */
4397         key.objectid = parent;
4398         key.type = BTRFS_DIR_ITEM_KEY;
4399         key.offset = btrfs_name_hash(namebuf, len);
4400         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4401         err |= ret;
4402
4403         len = sizeof(*extref) + name_len;
4404         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4405         cur += len;
4406
4407         if (cur < total)
4408                 goto next;
4409
4410         return err;
4411 }
4412
4413 /*
4414  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4415  * DIR_ITEM/DIR_INDEX match.
4416  *
4417  * @root:       the root of the fs/file tree
4418  * @key:        the key of the INODE_REF/INODE_EXTREF
4419  * @name:       the name in the INODE_REF/INODE_EXTREF
4420  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4421  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4422  * to (u64)-1
4423  * @ext_ref:    the EXTENDED_IREF feature
4424  *
4425  * Return 0 if no error occurred.
4426  * Return >0 for error bitmap
4427  */
4428 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4429                           char *name, int namelen, u64 index,
4430                           unsigned int ext_ref)
4431 {
4432         struct btrfs_path path;
4433         struct btrfs_inode_ref *ref;
4434         struct btrfs_inode_extref *extref;
4435         struct extent_buffer *node;
4436         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4437         u32 total;
4438         u32 cur = 0;
4439         u32 len;
4440         u32 ref_namelen;
4441         u64 ref_index;
4442         u64 parent;
4443         u64 dir_id;
4444         int slot;
4445         int ret;
4446
4447         btrfs_init_path(&path);
4448         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4449         if (ret) {
4450                 ret = INODE_REF_MISSING;
4451                 goto extref;
4452         }
4453
4454         node = path.nodes[0];
4455         slot = path.slots[0];
4456
4457         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4458         total = btrfs_item_size_nr(node, slot);
4459
4460         /* Iterate all entry of INODE_REF */
4461         while (cur < total) {
4462                 ret = INODE_REF_MISSING;
4463
4464                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4465                 ref_index = btrfs_inode_ref_index(node, ref);
4466                 if (index != (u64)-1 && index != ref_index)
4467                         goto next_ref;
4468
4469                 if (ref_namelen <= BTRFS_NAME_LEN) {
4470                         len = ref_namelen;
4471                 } else {
4472                         len = BTRFS_NAME_LEN;
4473                         warning("root %llu INODE %s[%llu %llu] name too long",
4474                                 root->objectid,
4475                                 key->type == BTRFS_INODE_REF_KEY ?
4476                                         "REF" : "EXTREF",
4477                                 key->objectid, key->offset);
4478                 }
4479                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4480                                    len);
4481
4482                 if (len != namelen || strncmp(ref_namebuf, name, len))
4483                         goto next_ref;
4484
4485                 ret = 0;
4486                 goto out;
4487 next_ref:
4488                 len = sizeof(*ref) + ref_namelen;
4489                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4490                 cur += len;
4491         }
4492
4493 extref:
4494         /* Skip if not support EXTENDED_IREF feature */
4495         if (!ext_ref)
4496                 goto out;
4497
4498         btrfs_release_path(&path);
4499         btrfs_init_path(&path);
4500
4501         dir_id = key->offset;
4502         key->type = BTRFS_INODE_EXTREF_KEY;
4503         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4504
4505         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4506         if (ret) {
4507                 ret = INODE_REF_MISSING;
4508                 goto out;
4509         }
4510
4511         node = path.nodes[0];
4512         slot = path.slots[0];
4513
4514         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4515         cur = 0;
4516         total = btrfs_item_size_nr(node, slot);
4517
4518         /* Iterate all entry of INODE_EXTREF */
4519         while (cur < total) {
4520                 ret = INODE_REF_MISSING;
4521
4522                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4523                 ref_index = btrfs_inode_extref_index(node, extref);
4524                 parent = btrfs_inode_extref_parent(node, extref);
4525                 if (index != (u64)-1 && index != ref_index)
4526                         goto next_extref;
4527
4528                 if (parent != dir_id)
4529                         goto next_extref;
4530
4531                 if (ref_namelen <= BTRFS_NAME_LEN) {
4532                         len = ref_namelen;
4533                 } else {
4534                         len = BTRFS_NAME_LEN;
4535                         warning("root %llu INODE %s[%llu %llu] name too long",
4536                                 root->objectid,
4537                                 key->type == BTRFS_INODE_REF_KEY ?
4538                                         "REF" : "EXTREF",
4539                                 key->objectid, key->offset);
4540                 }
4541                 read_extent_buffer(node, ref_namebuf,
4542                                    (unsigned long)(extref + 1), len);
4543
4544                 if (len != namelen || strncmp(ref_namebuf, name, len))
4545                         goto next_extref;
4546
4547                 ret = 0;
4548                 goto out;
4549
4550 next_extref:
4551                 len = sizeof(*extref) + ref_namelen;
4552                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4553                 cur += len;
4554
4555         }
4556 out:
4557         btrfs_release_path(&path);
4558         return ret;
4559 }
4560
4561 /*
4562  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4563  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4564  *
4565  * @root:       the root of the fs/file tree
4566  * @key:        the key of the INODE_REF/INODE_EXTREF
4567  * @size:       the st_size of the INODE_ITEM
4568  * @ext_ref:    the EXTENDED_IREF feature
4569  *
4570  * Return 0 if no error occurred.
4571  */
4572 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4573                           struct extent_buffer *node, int slot, u64 *size,
4574                           unsigned int ext_ref)
4575 {
4576         struct btrfs_dir_item *di;
4577         struct btrfs_inode_item *ii;
4578         struct btrfs_path path;
4579         struct btrfs_key location;
4580         char namebuf[BTRFS_NAME_LEN] = {0};
4581         u32 total;
4582         u32 cur = 0;
4583         u32 len;
4584         u32 name_len;
4585         u32 data_len;
4586         u8 filetype;
4587         u32 mode;
4588         u64 index;
4589         int ret;
4590         int err = 0;
4591
4592         /*
4593          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4594          * ignore index check.
4595          */
4596         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4597
4598         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4599         total = btrfs_item_size_nr(node, slot);
4600
4601         while (cur < total) {
4602                 data_len = btrfs_dir_data_len(node, di);
4603                 if (data_len)
4604                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4605                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4606                               "DIR_ITEM" : "DIR_INDEX",
4607                               key->objectid, key->offset, data_len);
4608
4609                 name_len = btrfs_dir_name_len(node, di);
4610                 if (name_len <= BTRFS_NAME_LEN) {
4611                         len = name_len;
4612                 } else {
4613                         len = BTRFS_NAME_LEN;
4614                         warning("root %llu %s[%llu %llu] name too long",
4615                                 root->objectid,
4616                                 key->type == BTRFS_DIR_ITEM_KEY ?
4617                                 "DIR_ITEM" : "DIR_INDEX",
4618                                 key->objectid, key->offset);
4619                 }
4620                 (*size) += name_len;
4621
4622                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4623                 filetype = btrfs_dir_type(node, di);
4624
4625                 btrfs_init_path(&path);
4626                 btrfs_dir_item_key_to_cpu(node, di, &location);
4627
4628                 /* Ignore related ROOT_ITEM check */
4629                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4630                         goto next;
4631
4632                 /* Check relative INODE_ITEM(existence/filetype) */
4633                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4634                 if (ret) {
4635                         err |= INODE_ITEM_MISSING;
4636                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4637                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4638                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4639                               key->offset, location.objectid, name_len,
4640                               namebuf, filetype);
4641                         goto next;
4642                 }
4643
4644                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4645                                     struct btrfs_inode_item);
4646                 mode = btrfs_inode_mode(path.nodes[0], ii);
4647
4648                 if (imode_to_type(mode) != filetype) {
4649                         err |= INODE_ITEM_MISMATCH;
4650                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4651                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4652                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4653                               key->offset, name_len, namebuf, filetype);
4654                 }
4655
4656                 /* Check relative INODE_REF/INODE_EXTREF */
4657                 location.type = BTRFS_INODE_REF_KEY;
4658                 location.offset = key->objectid;
4659                 ret = find_inode_ref(root, &location, namebuf, len,
4660                                        index, ext_ref);
4661                 err |= ret;
4662                 if (ret & INODE_REF_MISSING)
4663                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4664                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4665                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4666                               key->offset, name_len, namebuf, filetype);
4667
4668 next:
4669                 btrfs_release_path(&path);
4670                 len = sizeof(*di) + name_len + data_len;
4671                 di = (struct btrfs_dir_item *)((char *)di + len);
4672                 cur += len;
4673
4674                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4675                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4676                               root->objectid, key->objectid, key->offset);
4677                         break;
4678                 }
4679         }
4680
4681         return err;
4682 }
4683
4684 /*
4685  * Check file extent datasum/hole, update the size of the file extents,
4686  * check and update the last offset of the file extent.
4687  *
4688  * @root:       the root of fs/file tree.
4689  * @fkey:       the key of the file extent.
4690  * @nodatasum:  INODE_NODATASUM feature.
4691  * @size:       the sum of all EXTENT_DATA items size for this inode.
4692  * @end:        the offset of the last extent.
4693  *
4694  * Return 0 if no error occurred.
4695  */
4696 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4697                              struct extent_buffer *node, int slot,
4698                              unsigned int nodatasum, u64 *size, u64 *end)
4699 {
4700         struct btrfs_file_extent_item *fi;
4701         u64 disk_bytenr;
4702         u64 disk_num_bytes;
4703         u64 extent_num_bytes;
4704         u64 found;
4705         unsigned int extent_type;
4706         unsigned int is_hole;
4707         int ret;
4708         int err = 0;
4709
4710         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4711
4712         extent_type = btrfs_file_extent_type(node, fi);
4713         /* Skip if file extent is inline */
4714         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4715                 struct btrfs_item *e = btrfs_item_nr(slot);
4716                 u32 item_inline_len;
4717
4718                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4719                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4720                 if (extent_num_bytes == 0 ||
4721                     extent_num_bytes != item_inline_len)
4722                         err |= FILE_EXTENT_ERROR;
4723                 *size += extent_num_bytes;
4724                 return err;
4725         }
4726
4727         /* Check extent type */
4728         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4729                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4730                 err |= FILE_EXTENT_ERROR;
4731                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4732                       root->objectid, fkey->objectid, fkey->offset);
4733                 return err;
4734         }
4735
4736         /* Check REG_EXTENT/PREALLOC_EXTENT */
4737         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4738         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4739         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4740         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4741
4742         /* Check EXTENT_DATA datasum */
4743         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4744         if (found > 0 && nodatasum) {
4745                 err |= ODD_CSUM_ITEM;
4746                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4747                       root->objectid, fkey->objectid, fkey->offset);
4748         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4749                    !is_hole &&
4750                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4751                 err |= CSUM_ITEM_MISSING;
4752                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4753                       root->objectid, fkey->objectid, fkey->offset);
4754         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4755                 err |= ODD_CSUM_ITEM;
4756                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4757                       root->objectid, fkey->objectid, fkey->offset);
4758         }
4759
4760         /* Check EXTENT_DATA hole */
4761         if (no_holes && is_hole) {
4762                 err |= FILE_EXTENT_ERROR;
4763                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4764                       root->objectid, fkey->objectid, fkey->offset);
4765         } else if (!no_holes && *end != fkey->offset) {
4766                 err |= FILE_EXTENT_ERROR;
4767                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4768                       root->objectid, fkey->objectid, fkey->offset);
4769         }
4770
4771         *end += extent_num_bytes;
4772         if (!is_hole)
4773                 *size += extent_num_bytes;
4774
4775         return err;
4776 }
4777
4778 /*
4779  * Check INODE_ITEM and related ITEMs (the same inode number)
4780  * 1. check link count
4781  * 2. check inode ref/extref
4782  * 3. check dir item/index
4783  *
4784  * @ext_ref:    the EXTENDED_IREF feature
4785  *
4786  * Return 0 if no error occurred.
4787  * Return >0 for error or hit the traversal is done(by error bitmap)
4788  */
4789 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4790                             unsigned int ext_ref)
4791 {
4792         struct extent_buffer *node;
4793         struct btrfs_inode_item *ii;
4794         struct btrfs_key key;
4795         u64 inode_id;
4796         u32 mode;
4797         u64 nlink;
4798         u64 nbytes;
4799         u64 isize;
4800         u64 size = 0;
4801         u64 refs = 0;
4802         u64 extent_end = 0;
4803         u64 extent_size = 0;
4804         unsigned int dir;
4805         unsigned int nodatasum;
4806         int slot;
4807         int ret;
4808         int err = 0;
4809
4810         node = path->nodes[0];
4811         slot = path->slots[0];
4812
4813         btrfs_item_key_to_cpu(node, &key, slot);
4814         inode_id = key.objectid;
4815
4816         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4817                 ret = btrfs_next_item(root, path);
4818                 if (ret > 0)
4819                         err |= LAST_ITEM;
4820                 return err;
4821         }
4822
4823         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4824         isize = btrfs_inode_size(node, ii);
4825         nbytes = btrfs_inode_nbytes(node, ii);
4826         mode = btrfs_inode_mode(node, ii);
4827         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4828         nlink = btrfs_inode_nlink(node, ii);
4829         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4830
4831         while (1) {
4832                 ret = btrfs_next_item(root, path);
4833                 if (ret < 0) {
4834                         /* out will fill 'err' rusing current statistics */
4835                         goto out;
4836                 } else if (ret > 0) {
4837                         err |= LAST_ITEM;
4838                         goto out;
4839                 }
4840
4841                 node = path->nodes[0];
4842                 slot = path->slots[0];
4843                 btrfs_item_key_to_cpu(node, &key, slot);
4844                 if (key.objectid != inode_id)
4845                         goto out;
4846
4847                 switch (key.type) {
4848                 case BTRFS_INODE_REF_KEY:
4849                         ret = check_inode_ref(root, &key, node, slot, &refs,
4850                                               mode);
4851                         err |= ret;
4852                         break;
4853                 case BTRFS_INODE_EXTREF_KEY:
4854                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4855                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4856                                         root->objectid, key.objectid,
4857                                         key.offset);
4858                         ret = check_inode_extref(root, &key, node, slot, &refs,
4859                                                  mode);
4860                         err |= ret;
4861                         break;
4862                 case BTRFS_DIR_ITEM_KEY:
4863                 case BTRFS_DIR_INDEX_KEY:
4864                         if (!dir) {
4865                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4866                                         root->objectid, inode_id,
4867                                         imode_to_type(mode), key.objectid,
4868                                         key.offset);
4869                         }
4870                         ret = check_dir_item(root, &key, node, slot, &size,
4871                                              ext_ref);
4872                         err |= ret;
4873                         break;
4874                 case BTRFS_EXTENT_DATA_KEY:
4875                         if (dir) {
4876                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4877                                         root->objectid, inode_id, key.objectid,
4878                                         key.offset);
4879                         }
4880                         ret = check_file_extent(root, &key, node, slot,
4881                                                 nodatasum, &extent_size,
4882                                                 &extent_end);
4883                         err |= ret;
4884                         break;
4885                 case BTRFS_XATTR_ITEM_KEY:
4886                         break;
4887                 default:
4888                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4889                               key.objectid, key.type, key.offset);
4890                 }
4891         }
4892
4893 out:
4894         /* verify INODE_ITEM nlink/isize/nbytes */
4895         if (dir) {
4896                 if (nlink != 1) {
4897                         err |= LINK_COUNT_ERROR;
4898                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4899                               root->objectid, inode_id, nlink);
4900                 }
4901
4902                 /*
4903                  * Just a warning, as dir inode nbytes is just an
4904                  * instructive value.
4905                  */
4906                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4907                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4908                                 root->objectid, inode_id, root->nodesize);
4909                 }
4910
4911                 if (isize != size) {
4912                         err |= ISIZE_ERROR;
4913                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4914                               root->objectid, inode_id, isize, size);
4915                 }
4916         } else {
4917                 if (nlink != refs) {
4918                         err |= LINK_COUNT_ERROR;
4919                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4920                               root->objectid, inode_id, nlink, refs);
4921                 } else if (!nlink) {
4922                         err |= ORPHAN_ITEM;
4923                 }
4924
4925                 if (!nbytes && !no_holes && extent_end < isize) {
4926                         err |= NBYTES_ERROR;
4927                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4928                               root->objectid, inode_id, isize);
4929                 }
4930
4931                 if (nbytes != extent_size) {
4932                         err |= NBYTES_ERROR;
4933                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4934                               root->objectid, inode_id, nbytes, extent_size);
4935                 }
4936         }
4937
4938         return err;
4939 }
4940
4941 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4942 {
4943         struct btrfs_path path;
4944         struct btrfs_key key;
4945         int err = 0;
4946         int ret;
4947
4948         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4949         key.type = BTRFS_INODE_ITEM_KEY;
4950         key.offset = 0;
4951
4952         /* For root being dropped, we don't need to check first inode */
4953         if (btrfs_root_refs(&root->root_item) == 0 &&
4954             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4955             key.objectid)
4956                 return 0;
4957
4958         btrfs_init_path(&path);
4959
4960         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4961         if (ret < 0)
4962                 goto out;
4963         if (ret > 0) {
4964                 ret = 0;
4965                 err |= INODE_ITEM_MISSING;
4966         }
4967
4968         err |= check_inode_item(root, &path, ext_ref);
4969         err &= ~LAST_ITEM;
4970         if (err && !ret)
4971                 ret = -EIO;
4972 out:
4973         btrfs_release_path(&path);
4974         return ret;
4975 }
4976
4977 /*
4978  * Iterate all item on the tree and call check_inode_item() to check.
4979  *
4980  * @root:       the root of the tree to be checked.
4981  * @ext_ref:    the EXTENDED_IREF feature
4982  *
4983  * Return 0 if no error found.
4984  * Return <0 for error.
4985  */
4986 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4987 {
4988         struct btrfs_path path;
4989         struct node_refs nrefs;
4990         struct btrfs_root_item *root_item = &root->root_item;
4991         int ret, wret;
4992         int level;
4993
4994         /*
4995          * We need to manually check the first inode item(256)
4996          * As the following traversal function will only start from
4997          * the first inode item in the leaf, if inode item(256) is missing
4998          * we will just skip it forever.
4999          */
5000         ret = check_fs_first_inode(root, ext_ref);
5001         if (ret < 0)
5002                 return ret;
5003
5004         memset(&nrefs, 0, sizeof(nrefs));
5005         level = btrfs_header_level(root->node);
5006         btrfs_init_path(&path);
5007
5008         if (btrfs_root_refs(root_item) > 0 ||
5009             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5010                 path.nodes[level] = root->node;
5011                 path.slots[level] = 0;
5012                 extent_buffer_get(root->node);
5013         } else {
5014                 struct btrfs_key key;
5015
5016                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5017                 level = root_item->drop_level;
5018                 path.lowest_level = level;
5019                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5020                 if (ret < 0)
5021                         goto out;
5022                 ret = 0;
5023         }
5024
5025         while (1) {
5026                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5027                 if (wret < 0)
5028                         ret = wret;
5029                 if (wret != 0)
5030                         break;
5031
5032                 wret = walk_up_tree_v2(root, &path, &level);
5033                 if (wret < 0)
5034                         ret = wret;
5035                 if (wret != 0)
5036                         break;
5037         }
5038
5039 out:
5040         btrfs_release_path(&path);
5041         return ret;
5042 }
5043
5044 /*
5045  * Find the relative ref for root_ref and root_backref.
5046  *
5047  * @root:       the root of the root tree.
5048  * @ref_key:    the key of the root ref.
5049  *
5050  * Return 0 if no error occurred.
5051  */
5052 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5053                           struct extent_buffer *node, int slot)
5054 {
5055         struct btrfs_path path;
5056         struct btrfs_key key;
5057         struct btrfs_root_ref *ref;
5058         struct btrfs_root_ref *backref;
5059         char ref_name[BTRFS_NAME_LEN] = {0};
5060         char backref_name[BTRFS_NAME_LEN] = {0};
5061         u64 ref_dirid;
5062         u64 ref_seq;
5063         u32 ref_namelen;
5064         u64 backref_dirid;
5065         u64 backref_seq;
5066         u32 backref_namelen;
5067         u32 len;
5068         int ret;
5069         int err = 0;
5070
5071         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5072         ref_dirid = btrfs_root_ref_dirid(node, ref);
5073         ref_seq = btrfs_root_ref_sequence(node, ref);
5074         ref_namelen = btrfs_root_ref_name_len(node, ref);
5075
5076         if (ref_namelen <= BTRFS_NAME_LEN) {
5077                 len = ref_namelen;
5078         } else {
5079                 len = BTRFS_NAME_LEN;
5080                 warning("%s[%llu %llu] ref_name too long",
5081                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5082                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5083                         ref_key->offset);
5084         }
5085         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5086
5087         /* Find relative root_ref */
5088         key.objectid = ref_key->offset;
5089         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5090         key.offset = ref_key->objectid;
5091
5092         btrfs_init_path(&path);
5093         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5094         if (ret) {
5095                 err |= ROOT_REF_MISSING;
5096                 error("%s[%llu %llu] couldn't find relative ref",
5097                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5098                       "ROOT_REF" : "ROOT_BACKREF",
5099                       ref_key->objectid, ref_key->offset);
5100                 goto out;
5101         }
5102
5103         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5104                                  struct btrfs_root_ref);
5105         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5106         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5107         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5108
5109         if (backref_namelen <= BTRFS_NAME_LEN) {
5110                 len = backref_namelen;
5111         } else {
5112                 len = BTRFS_NAME_LEN;
5113                 warning("%s[%llu %llu] ref_name too long",
5114                         key.type == BTRFS_ROOT_REF_KEY ?
5115                         "ROOT_REF" : "ROOT_BACKREF",
5116                         key.objectid, key.offset);
5117         }
5118         read_extent_buffer(path.nodes[0], backref_name,
5119                            (unsigned long)(backref + 1), len);
5120
5121         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5122             ref_namelen != backref_namelen ||
5123             strncmp(ref_name, backref_name, len)) {
5124                 err |= ROOT_REF_MISMATCH;
5125                 error("%s[%llu %llu] mismatch relative ref",
5126                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5127                       "ROOT_REF" : "ROOT_BACKREF",
5128                       ref_key->objectid, ref_key->offset);
5129         }
5130 out:
5131         btrfs_release_path(&path);
5132         return err;
5133 }
5134
5135 /*
5136  * Check all fs/file tree in low_memory mode.
5137  *
5138  * 1. for fs tree root item, call check_fs_root_v2()
5139  * 2. for fs tree root ref/backref, call check_root_ref()
5140  *
5141  * Return 0 if no error occurred.
5142  */
5143 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5144 {
5145         struct btrfs_root *tree_root = fs_info->tree_root;
5146         struct btrfs_root *cur_root = NULL;
5147         struct btrfs_path path;
5148         struct btrfs_key key;
5149         struct extent_buffer *node;
5150         unsigned int ext_ref;
5151         int slot;
5152         int ret;
5153         int err = 0;
5154
5155         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5156
5157         btrfs_init_path(&path);
5158         key.objectid = BTRFS_FS_TREE_OBJECTID;
5159         key.offset = 0;
5160         key.type = BTRFS_ROOT_ITEM_KEY;
5161
5162         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5163         if (ret < 0) {
5164                 err = ret;
5165                 goto out;
5166         } else if (ret > 0) {
5167                 err = -ENOENT;
5168                 goto out;
5169         }
5170
5171         while (1) {
5172                 node = path.nodes[0];
5173                 slot = path.slots[0];
5174                 btrfs_item_key_to_cpu(node, &key, slot);
5175                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5176                         goto out;
5177                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5178                     fs_root_objectid(key.objectid)) {
5179                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5180                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5181                                                                        &key);
5182                         } else {
5183                                 key.offset = (u64)-1;
5184                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5185                         }
5186
5187                         if (IS_ERR(cur_root)) {
5188                                 error("Fail to read fs/subvol tree: %lld",
5189                                       key.objectid);
5190                                 err = -EIO;
5191                                 goto next;
5192                         }
5193
5194                         ret = check_fs_root_v2(cur_root, ext_ref);
5195                         err |= ret;
5196
5197                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5198                                 btrfs_free_fs_root(cur_root);
5199                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5200                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5201                         ret = check_root_ref(tree_root, &key, node, slot);
5202                         err |= ret;
5203                 }
5204 next:
5205                 ret = btrfs_next_item(tree_root, &path);
5206                 if (ret > 0)
5207                         goto out;
5208                 if (ret < 0) {
5209                         err = ret;
5210                         goto out;
5211                 }
5212         }
5213
5214 out:
5215         btrfs_release_path(&path);
5216         return err;
5217 }
5218
5219 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5220 {
5221         struct list_head *cur = rec->backrefs.next;
5222         struct extent_backref *back;
5223         struct tree_backref *tback;
5224         struct data_backref *dback;
5225         u64 found = 0;
5226         int err = 0;
5227
5228         while(cur != &rec->backrefs) {
5229                 back = to_extent_backref(cur);
5230                 cur = cur->next;
5231                 if (!back->found_extent_tree) {
5232                         err = 1;
5233                         if (!print_errs)
5234                                 goto out;
5235                         if (back->is_data) {
5236                                 dback = to_data_backref(back);
5237                                 fprintf(stderr, "Backref %llu %s %llu"
5238                                         " owner %llu offset %llu num_refs %lu"
5239                                         " not found in extent tree\n",
5240                                         (unsigned long long)rec->start,
5241                                         back->full_backref ?
5242                                         "parent" : "root",
5243                                         back->full_backref ?
5244                                         (unsigned long long)dback->parent:
5245                                         (unsigned long long)dback->root,
5246                                         (unsigned long long)dback->owner,
5247                                         (unsigned long long)dback->offset,
5248                                         (unsigned long)dback->num_refs);
5249                         } else {
5250                                 tback = to_tree_backref(back);
5251                                 fprintf(stderr, "Backref %llu parent %llu"
5252                                         " root %llu not found in extent tree\n",
5253                                         (unsigned long long)rec->start,
5254                                         (unsigned long long)tback->parent,
5255                                         (unsigned long long)tback->root);
5256                         }
5257                 }
5258                 if (!back->is_data && !back->found_ref) {
5259                         err = 1;
5260                         if (!print_errs)
5261                                 goto out;
5262                         tback = to_tree_backref(back);
5263                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5264                                 (unsigned long long)rec->start,
5265                                 back->full_backref ? "parent" : "root",
5266                                 back->full_backref ?
5267                                 (unsigned long long)tback->parent :
5268                                 (unsigned long long)tback->root, back);
5269                 }
5270                 if (back->is_data) {
5271                         dback = to_data_backref(back);
5272                         if (dback->found_ref != dback->num_refs) {
5273                                 err = 1;
5274                                 if (!print_errs)
5275                                         goto out;
5276                                 fprintf(stderr, "Incorrect local backref count"
5277                                         " on %llu %s %llu owner %llu"
5278                                         " offset %llu found %u wanted %u back %p\n",
5279                                         (unsigned long long)rec->start,
5280                                         back->full_backref ?
5281                                         "parent" : "root",
5282                                         back->full_backref ?
5283                                         (unsigned long long)dback->parent:
5284                                         (unsigned long long)dback->root,
5285                                         (unsigned long long)dback->owner,
5286                                         (unsigned long long)dback->offset,
5287                                         dback->found_ref, dback->num_refs, back);
5288                         }
5289                         if (dback->disk_bytenr != rec->start) {
5290                                 err = 1;
5291                                 if (!print_errs)
5292                                         goto out;
5293                                 fprintf(stderr, "Backref disk bytenr does not"
5294                                         " match extent record, bytenr=%llu, "
5295                                         "ref bytenr=%llu\n",
5296                                         (unsigned long long)rec->start,
5297                                         (unsigned long long)dback->disk_bytenr);
5298                         }
5299
5300                         if (dback->bytes != rec->nr) {
5301                                 err = 1;
5302                                 if (!print_errs)
5303                                         goto out;
5304                                 fprintf(stderr, "Backref bytes do not match "
5305                                         "extent backref, bytenr=%llu, ref "
5306                                         "bytes=%llu, backref bytes=%llu\n",
5307                                         (unsigned long long)rec->start,
5308                                         (unsigned long long)rec->nr,
5309                                         (unsigned long long)dback->bytes);
5310                         }
5311                 }
5312                 if (!back->is_data) {
5313                         found += 1;
5314                 } else {
5315                         dback = to_data_backref(back);
5316                         found += dback->found_ref;
5317                 }
5318         }
5319         if (found != rec->refs) {
5320                 err = 1;
5321                 if (!print_errs)
5322                         goto out;
5323                 fprintf(stderr, "Incorrect global backref count "
5324                         "on %llu found %llu wanted %llu\n",
5325                         (unsigned long long)rec->start,
5326                         (unsigned long long)found,
5327                         (unsigned long long)rec->refs);
5328         }
5329 out:
5330         return err;
5331 }
5332
5333 static int free_all_extent_backrefs(struct extent_record *rec)
5334 {
5335         struct extent_backref *back;
5336         struct list_head *cur;
5337         while (!list_empty(&rec->backrefs)) {
5338                 cur = rec->backrefs.next;
5339                 back = to_extent_backref(cur);
5340                 list_del(cur);
5341                 free(back);
5342         }
5343         return 0;
5344 }
5345
5346 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5347                                      struct cache_tree *extent_cache)
5348 {
5349         struct cache_extent *cache;
5350         struct extent_record *rec;
5351
5352         while (1) {
5353                 cache = first_cache_extent(extent_cache);
5354                 if (!cache)
5355                         break;
5356                 rec = container_of(cache, struct extent_record, cache);
5357                 remove_cache_extent(extent_cache, cache);
5358                 free_all_extent_backrefs(rec);
5359                 free(rec);
5360         }
5361 }
5362
5363 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5364                                  struct extent_record *rec)
5365 {
5366         if (rec->content_checked && rec->owner_ref_checked &&
5367             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5368             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5369             !rec->bad_full_backref && !rec->crossing_stripes &&
5370             !rec->wrong_chunk_type) {
5371                 remove_cache_extent(extent_cache, &rec->cache);
5372                 free_all_extent_backrefs(rec);
5373                 list_del_init(&rec->list);
5374                 free(rec);
5375         }
5376         return 0;
5377 }
5378
5379 static int check_owner_ref(struct btrfs_root *root,
5380                             struct extent_record *rec,
5381                             struct extent_buffer *buf)
5382 {
5383         struct extent_backref *node;
5384         struct tree_backref *back;
5385         struct btrfs_root *ref_root;
5386         struct btrfs_key key;
5387         struct btrfs_path path;
5388         struct extent_buffer *parent;
5389         int level;
5390         int found = 0;
5391         int ret;
5392
5393         list_for_each_entry(node, &rec->backrefs, list) {
5394                 if (node->is_data)
5395                         continue;
5396                 if (!node->found_ref)
5397                         continue;
5398                 if (node->full_backref)
5399                         continue;
5400                 back = to_tree_backref(node);
5401                 if (btrfs_header_owner(buf) == back->root)
5402                         return 0;
5403         }
5404         BUG_ON(rec->is_root);
5405
5406         /* try to find the block by search corresponding fs tree */
5407         key.objectid = btrfs_header_owner(buf);
5408         key.type = BTRFS_ROOT_ITEM_KEY;
5409         key.offset = (u64)-1;
5410
5411         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5412         if (IS_ERR(ref_root))
5413                 return 1;
5414
5415         level = btrfs_header_level(buf);
5416         if (level == 0)
5417                 btrfs_item_key_to_cpu(buf, &key, 0);
5418         else
5419                 btrfs_node_key_to_cpu(buf, &key, 0);
5420
5421         btrfs_init_path(&path);
5422         path.lowest_level = level + 1;
5423         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5424         if (ret < 0)
5425                 return 0;
5426
5427         parent = path.nodes[level + 1];
5428         if (parent && buf->start == btrfs_node_blockptr(parent,
5429                                                         path.slots[level + 1]))
5430                 found = 1;
5431
5432         btrfs_release_path(&path);
5433         return found ? 0 : 1;
5434 }
5435
5436 static int is_extent_tree_record(struct extent_record *rec)
5437 {
5438         struct list_head *cur = rec->backrefs.next;
5439         struct extent_backref *node;
5440         struct tree_backref *back;
5441         int is_extent = 0;
5442
5443         while(cur != &rec->backrefs) {
5444                 node = to_extent_backref(cur);
5445                 cur = cur->next;
5446                 if (node->is_data)
5447                         return 0;
5448                 back = to_tree_backref(node);
5449                 if (node->full_backref)
5450                         return 0;
5451                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5452                         is_extent = 1;
5453         }
5454         return is_extent;
5455 }
5456
5457
5458 static int record_bad_block_io(struct btrfs_fs_info *info,
5459                                struct cache_tree *extent_cache,
5460                                u64 start, u64 len)
5461 {
5462         struct extent_record *rec;
5463         struct cache_extent *cache;
5464         struct btrfs_key key;
5465
5466         cache = lookup_cache_extent(extent_cache, start, len);
5467         if (!cache)
5468                 return 0;
5469
5470         rec = container_of(cache, struct extent_record, cache);
5471         if (!is_extent_tree_record(rec))
5472                 return 0;
5473
5474         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5475         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5476 }
5477
5478 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5479                        struct extent_buffer *buf, int slot)
5480 {
5481         if (btrfs_header_level(buf)) {
5482                 struct btrfs_key_ptr ptr1, ptr2;
5483
5484                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5485                                    sizeof(struct btrfs_key_ptr));
5486                 read_extent_buffer(buf, &ptr2,
5487                                    btrfs_node_key_ptr_offset(slot + 1),
5488                                    sizeof(struct btrfs_key_ptr));
5489                 write_extent_buffer(buf, &ptr1,
5490                                     btrfs_node_key_ptr_offset(slot + 1),
5491                                     sizeof(struct btrfs_key_ptr));
5492                 write_extent_buffer(buf, &ptr2,
5493                                     btrfs_node_key_ptr_offset(slot),
5494                                     sizeof(struct btrfs_key_ptr));
5495                 if (slot == 0) {
5496                         struct btrfs_disk_key key;
5497                         btrfs_node_key(buf, &key, 0);
5498                         btrfs_fixup_low_keys(root, path, &key,
5499                                              btrfs_header_level(buf) + 1);
5500                 }
5501         } else {
5502                 struct btrfs_item *item1, *item2;
5503                 struct btrfs_key k1, k2;
5504                 char *item1_data, *item2_data;
5505                 u32 item1_offset, item2_offset, item1_size, item2_size;
5506
5507                 item1 = btrfs_item_nr(slot);
5508                 item2 = btrfs_item_nr(slot + 1);
5509                 btrfs_item_key_to_cpu(buf, &k1, slot);
5510                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5511                 item1_offset = btrfs_item_offset(buf, item1);
5512                 item2_offset = btrfs_item_offset(buf, item2);
5513                 item1_size = btrfs_item_size(buf, item1);
5514                 item2_size = btrfs_item_size(buf, item2);
5515
5516                 item1_data = malloc(item1_size);
5517                 if (!item1_data)
5518                         return -ENOMEM;
5519                 item2_data = malloc(item2_size);
5520                 if (!item2_data) {
5521                         free(item1_data);
5522                         return -ENOMEM;
5523                 }
5524
5525                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5526                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5527
5528                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5529                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5530                 free(item1_data);
5531                 free(item2_data);
5532
5533                 btrfs_set_item_offset(buf, item1, item2_offset);
5534                 btrfs_set_item_offset(buf, item2, item1_offset);
5535                 btrfs_set_item_size(buf, item1, item2_size);
5536                 btrfs_set_item_size(buf, item2, item1_size);
5537
5538                 path->slots[0] = slot;
5539                 btrfs_set_item_key_unsafe(root, path, &k2);
5540                 path->slots[0] = slot + 1;
5541                 btrfs_set_item_key_unsafe(root, path, &k1);
5542         }
5543         return 0;
5544 }
5545
5546 static int fix_key_order(struct btrfs_trans_handle *trans,
5547                          struct btrfs_root *root,
5548                          struct btrfs_path *path)
5549 {
5550         struct extent_buffer *buf;
5551         struct btrfs_key k1, k2;
5552         int i;
5553         int level = path->lowest_level;
5554         int ret = -EIO;
5555
5556         buf = path->nodes[level];
5557         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5558                 if (level) {
5559                         btrfs_node_key_to_cpu(buf, &k1, i);
5560                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5561                 } else {
5562                         btrfs_item_key_to_cpu(buf, &k1, i);
5563                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5564                 }
5565                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5566                         continue;
5567                 ret = swap_values(root, path, buf, i);
5568                 if (ret)
5569                         break;
5570                 btrfs_mark_buffer_dirty(buf);
5571                 i = 0;
5572         }
5573         return ret;
5574 }
5575
5576 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5577                              struct btrfs_root *root,
5578                              struct btrfs_path *path,
5579                              struct extent_buffer *buf, int slot)
5580 {
5581         struct btrfs_key key;
5582         int nritems = btrfs_header_nritems(buf);
5583
5584         btrfs_item_key_to_cpu(buf, &key, slot);
5585
5586         /* These are all the keys we can deal with missing. */
5587         if (key.type != BTRFS_DIR_INDEX_KEY &&
5588             key.type != BTRFS_EXTENT_ITEM_KEY &&
5589             key.type != BTRFS_METADATA_ITEM_KEY &&
5590             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5591             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5592                 return -1;
5593
5594         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5595                (unsigned long long)key.objectid, key.type,
5596                (unsigned long long)key.offset, slot, buf->start);
5597         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5598                               btrfs_item_nr_offset(slot + 1),
5599                               sizeof(struct btrfs_item) *
5600                               (nritems - slot - 1));
5601         btrfs_set_header_nritems(buf, nritems - 1);
5602         if (slot == 0) {
5603                 struct btrfs_disk_key disk_key;
5604
5605                 btrfs_item_key(buf, &disk_key, 0);
5606                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5607         }
5608         btrfs_mark_buffer_dirty(buf);
5609         return 0;
5610 }
5611
5612 static int fix_item_offset(struct btrfs_trans_handle *trans,
5613                            struct btrfs_root *root,
5614                            struct btrfs_path *path)
5615 {
5616         struct extent_buffer *buf;
5617         int i;
5618         int ret = 0;
5619
5620         /* We should only get this for leaves */
5621         BUG_ON(path->lowest_level);
5622         buf = path->nodes[0];
5623 again:
5624         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5625                 unsigned int shift = 0, offset;
5626
5627                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5628                     BTRFS_LEAF_DATA_SIZE(root)) {
5629                         if (btrfs_item_end_nr(buf, i) >
5630                             BTRFS_LEAF_DATA_SIZE(root)) {
5631                                 ret = delete_bogus_item(trans, root, path,
5632                                                         buf, i);
5633                                 if (!ret)
5634                                         goto again;
5635                                 fprintf(stderr, "item is off the end of the "
5636                                         "leaf, can't fix\n");
5637                                 ret = -EIO;
5638                                 break;
5639                         }
5640                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5641                                 btrfs_item_end_nr(buf, i);
5642                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5643                            btrfs_item_offset_nr(buf, i - 1)) {
5644                         if (btrfs_item_end_nr(buf, i) >
5645                             btrfs_item_offset_nr(buf, i - 1)) {
5646                                 ret = delete_bogus_item(trans, root, path,
5647                                                         buf, i);
5648                                 if (!ret)
5649                                         goto again;
5650                                 fprintf(stderr, "items overlap, can't fix\n");
5651                                 ret = -EIO;
5652                                 break;
5653                         }
5654                         shift = btrfs_item_offset_nr(buf, i - 1) -
5655                                 btrfs_item_end_nr(buf, i);
5656                 }
5657                 if (!shift)
5658                         continue;
5659
5660                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5661                        i, shift, (unsigned long long)buf->start);
5662                 offset = btrfs_item_offset_nr(buf, i);
5663                 memmove_extent_buffer(buf,
5664                                       btrfs_leaf_data(buf) + offset + shift,
5665                                       btrfs_leaf_data(buf) + offset,
5666                                       btrfs_item_size_nr(buf, i));
5667                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5668                                       offset + shift);
5669                 btrfs_mark_buffer_dirty(buf);
5670         }
5671
5672         /*
5673          * We may have moved things, in which case we want to exit so we don't
5674          * write those changes out.  Once we have proper abort functionality in
5675          * progs this can be changed to something nicer.
5676          */
5677         BUG_ON(ret);
5678         return ret;
5679 }
5680
5681 /*
5682  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5683  * then just return -EIO.
5684  */
5685 static int try_to_fix_bad_block(struct btrfs_root *root,
5686                                 struct extent_buffer *buf,
5687                                 enum btrfs_tree_block_status status)
5688 {
5689         struct btrfs_trans_handle *trans;
5690         struct ulist *roots;
5691         struct ulist_node *node;
5692         struct btrfs_root *search_root;
5693         struct btrfs_path path;
5694         struct ulist_iterator iter;
5695         struct btrfs_key root_key, key;
5696         int ret;
5697
5698         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5699             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5700                 return -EIO;
5701
5702         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5703         if (ret)
5704                 return -EIO;
5705
5706         btrfs_init_path(&path);
5707         ULIST_ITER_INIT(&iter);
5708         while ((node = ulist_next(roots, &iter))) {
5709                 root_key.objectid = node->val;
5710                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5711                 root_key.offset = (u64)-1;
5712
5713                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5714                 if (IS_ERR(root)) {
5715                         ret = -EIO;
5716                         break;
5717                 }
5718
5719
5720                 trans = btrfs_start_transaction(search_root, 0);
5721                 if (IS_ERR(trans)) {
5722                         ret = PTR_ERR(trans);
5723                         break;
5724                 }
5725
5726                 path.lowest_level = btrfs_header_level(buf);
5727                 path.skip_check_block = 1;
5728                 if (path.lowest_level)
5729                         btrfs_node_key_to_cpu(buf, &key, 0);
5730                 else
5731                         btrfs_item_key_to_cpu(buf, &key, 0);
5732                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5733                 if (ret) {
5734                         ret = -EIO;
5735                         btrfs_commit_transaction(trans, search_root);
5736                         break;
5737                 }
5738                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5739                         ret = fix_key_order(trans, search_root, &path);
5740                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5741                         ret = fix_item_offset(trans, search_root, &path);
5742                 if (ret) {
5743                         btrfs_commit_transaction(trans, search_root);
5744                         break;
5745                 }
5746                 btrfs_release_path(&path);
5747                 btrfs_commit_transaction(trans, search_root);
5748         }
5749         ulist_free(roots);
5750         btrfs_release_path(&path);
5751         return ret;
5752 }
5753
5754 static int check_block(struct btrfs_root *root,
5755                        struct cache_tree *extent_cache,
5756                        struct extent_buffer *buf, u64 flags)
5757 {
5758         struct extent_record *rec;
5759         struct cache_extent *cache;
5760         struct btrfs_key key;
5761         enum btrfs_tree_block_status status;
5762         int ret = 0;
5763         int level;
5764
5765         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5766         if (!cache)
5767                 return 1;
5768         rec = container_of(cache, struct extent_record, cache);
5769         rec->generation = btrfs_header_generation(buf);
5770
5771         level = btrfs_header_level(buf);
5772         if (btrfs_header_nritems(buf) > 0) {
5773
5774                 if (level == 0)
5775                         btrfs_item_key_to_cpu(buf, &key, 0);
5776                 else
5777                         btrfs_node_key_to_cpu(buf, &key, 0);
5778
5779                 rec->info_objectid = key.objectid;
5780         }
5781         rec->info_level = level;
5782
5783         if (btrfs_is_leaf(buf))
5784                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5785         else
5786                 status = btrfs_check_node(root, &rec->parent_key, buf);
5787
5788         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5789                 if (repair)
5790                         status = try_to_fix_bad_block(root, buf, status);
5791                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5792                         ret = -EIO;
5793                         fprintf(stderr, "bad block %llu\n",
5794                                 (unsigned long long)buf->start);
5795                 } else {
5796                         /*
5797                          * Signal to callers we need to start the scan over
5798                          * again since we'll have cowed blocks.
5799                          */
5800                         ret = -EAGAIN;
5801                 }
5802         } else {
5803                 rec->content_checked = 1;
5804                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5805                         rec->owner_ref_checked = 1;
5806                 else {
5807                         ret = check_owner_ref(root, rec, buf);
5808                         if (!ret)
5809                                 rec->owner_ref_checked = 1;
5810                 }
5811         }
5812         if (!ret)
5813                 maybe_free_extent_rec(extent_cache, rec);
5814         return ret;
5815 }
5816
5817 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5818                                                 u64 parent, u64 root)
5819 {
5820         struct list_head *cur = rec->backrefs.next;
5821         struct extent_backref *node;
5822         struct tree_backref *back;
5823
5824         while(cur != &rec->backrefs) {
5825                 node = to_extent_backref(cur);
5826                 cur = cur->next;
5827                 if (node->is_data)
5828                         continue;
5829                 back = to_tree_backref(node);
5830                 if (parent > 0) {
5831                         if (!node->full_backref)
5832                                 continue;
5833                         if (parent == back->parent)
5834                                 return back;
5835                 } else {
5836                         if (node->full_backref)
5837                                 continue;
5838                         if (back->root == root)
5839                                 return back;
5840                 }
5841         }
5842         return NULL;
5843 }
5844
5845 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5846                                                 u64 parent, u64 root)
5847 {
5848         struct tree_backref *ref = malloc(sizeof(*ref));
5849
5850         if (!ref)
5851                 return NULL;
5852         memset(&ref->node, 0, sizeof(ref->node));
5853         if (parent > 0) {
5854                 ref->parent = parent;
5855                 ref->node.full_backref = 1;
5856         } else {
5857                 ref->root = root;
5858                 ref->node.full_backref = 0;
5859         }
5860         list_add_tail(&ref->node.list, &rec->backrefs);
5861
5862         return ref;
5863 }
5864
5865 static struct data_backref *find_data_backref(struct extent_record *rec,
5866                                                 u64 parent, u64 root,
5867                                                 u64 owner, u64 offset,
5868                                                 int found_ref,
5869                                                 u64 disk_bytenr, u64 bytes)
5870 {
5871         struct list_head *cur = rec->backrefs.next;
5872         struct extent_backref *node;
5873         struct data_backref *back;
5874
5875         while(cur != &rec->backrefs) {
5876                 node = to_extent_backref(cur);
5877                 cur = cur->next;
5878                 if (!node->is_data)
5879                         continue;
5880                 back = to_data_backref(node);
5881                 if (parent > 0) {
5882                         if (!node->full_backref)
5883                                 continue;
5884                         if (parent == back->parent)
5885                                 return back;
5886                 } else {
5887                         if (node->full_backref)
5888                                 continue;
5889                         if (back->root == root && back->owner == owner &&
5890                             back->offset == offset) {
5891                                 if (found_ref && node->found_ref &&
5892                                     (back->bytes != bytes ||
5893                                     back->disk_bytenr != disk_bytenr))
5894                                         continue;
5895                                 return back;
5896                         }
5897                 }
5898         }
5899         return NULL;
5900 }
5901
5902 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5903                                                 u64 parent, u64 root,
5904                                                 u64 owner, u64 offset,
5905                                                 u64 max_size)
5906 {
5907         struct data_backref *ref = malloc(sizeof(*ref));
5908
5909         if (!ref)
5910                 return NULL;
5911         memset(&ref->node, 0, sizeof(ref->node));
5912         ref->node.is_data = 1;
5913
5914         if (parent > 0) {
5915                 ref->parent = parent;
5916                 ref->owner = 0;
5917                 ref->offset = 0;
5918                 ref->node.full_backref = 1;
5919         } else {
5920                 ref->root = root;
5921                 ref->owner = owner;
5922                 ref->offset = offset;
5923                 ref->node.full_backref = 0;
5924         }
5925         ref->bytes = max_size;
5926         ref->found_ref = 0;
5927         ref->num_refs = 0;
5928         list_add_tail(&ref->node.list, &rec->backrefs);
5929         if (max_size > rec->max_size)
5930                 rec->max_size = max_size;
5931         return ref;
5932 }
5933
5934 /* Check if the type of extent matches with its chunk */
5935 static void check_extent_type(struct extent_record *rec)
5936 {
5937         struct btrfs_block_group_cache *bg_cache;
5938
5939         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5940         if (!bg_cache)
5941                 return;
5942
5943         /* data extent, check chunk directly*/
5944         if (!rec->metadata) {
5945                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5946                         rec->wrong_chunk_type = 1;
5947                 return;
5948         }
5949
5950         /* metadata extent, check the obvious case first */
5951         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5952                                  BTRFS_BLOCK_GROUP_METADATA))) {
5953                 rec->wrong_chunk_type = 1;
5954                 return;
5955         }
5956
5957         /*
5958          * Check SYSTEM extent, as it's also marked as metadata, we can only
5959          * make sure it's a SYSTEM extent by its backref
5960          */
5961         if (!list_empty(&rec->backrefs)) {
5962                 struct extent_backref *node;
5963                 struct tree_backref *tback;
5964                 u64 bg_type;
5965
5966                 node = to_extent_backref(rec->backrefs.next);
5967                 if (node->is_data) {
5968                         /* tree block shouldn't have data backref */
5969                         rec->wrong_chunk_type = 1;
5970                         return;
5971                 }
5972                 tback = container_of(node, struct tree_backref, node);
5973
5974                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5975                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5976                 else
5977                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5978                 if (!(bg_cache->flags & bg_type))
5979                         rec->wrong_chunk_type = 1;
5980         }
5981 }
5982
5983 /*
5984  * Allocate a new extent record, fill default values from @tmpl and insert int
5985  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5986  * the cache, otherwise it fails.
5987  */
5988 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5989                 struct extent_record *tmpl)
5990 {
5991         struct extent_record *rec;
5992         int ret = 0;
5993
5994         rec = malloc(sizeof(*rec));
5995         if (!rec)
5996                 return -ENOMEM;
5997         rec->start = tmpl->start;
5998         rec->max_size = tmpl->max_size;
5999         rec->nr = max(tmpl->nr, tmpl->max_size);
6000         rec->found_rec = tmpl->found_rec;
6001         rec->content_checked = tmpl->content_checked;
6002         rec->owner_ref_checked = tmpl->owner_ref_checked;
6003         rec->num_duplicates = 0;
6004         rec->metadata = tmpl->metadata;
6005         rec->flag_block_full_backref = FLAG_UNSET;
6006         rec->bad_full_backref = 0;
6007         rec->crossing_stripes = 0;
6008         rec->wrong_chunk_type = 0;
6009         rec->is_root = tmpl->is_root;
6010         rec->refs = tmpl->refs;
6011         rec->extent_item_refs = tmpl->extent_item_refs;
6012         rec->parent_generation = tmpl->parent_generation;
6013         INIT_LIST_HEAD(&rec->backrefs);
6014         INIT_LIST_HEAD(&rec->dups);
6015         INIT_LIST_HEAD(&rec->list);
6016         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6017         rec->cache.start = tmpl->start;
6018         rec->cache.size = tmpl->nr;
6019         ret = insert_cache_extent(extent_cache, &rec->cache);
6020         if (ret) {
6021                 free(rec);
6022                 return ret;
6023         }
6024         bytes_used += rec->nr;
6025
6026         if (tmpl->metadata)
6027                 rec->crossing_stripes = check_crossing_stripes(global_info,
6028                                 rec->start, global_info->tree_root->nodesize);
6029         check_extent_type(rec);
6030         return ret;
6031 }
6032
6033 /*
6034  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6035  * some are hints:
6036  * - refs              - if found, increase refs
6037  * - is_root           - if found, set
6038  * - content_checked   - if found, set
6039  * - owner_ref_checked - if found, set
6040  *
6041  * If not found, create a new one, initialize and insert.
6042  */
6043 static int add_extent_rec(struct cache_tree *extent_cache,
6044                 struct extent_record *tmpl)
6045 {
6046         struct extent_record *rec;
6047         struct cache_extent *cache;
6048         int ret = 0;
6049         int dup = 0;
6050
6051         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6052         if (cache) {
6053                 rec = container_of(cache, struct extent_record, cache);
6054                 if (tmpl->refs)
6055                         rec->refs++;
6056                 if (rec->nr == 1)
6057                         rec->nr = max(tmpl->nr, tmpl->max_size);
6058
6059                 /*
6060                  * We need to make sure to reset nr to whatever the extent
6061                  * record says was the real size, this way we can compare it to
6062                  * the backrefs.
6063                  */
6064                 if (tmpl->found_rec) {
6065                         if (tmpl->start != rec->start || rec->found_rec) {
6066                                 struct extent_record *tmp;
6067
6068                                 dup = 1;
6069                                 if (list_empty(&rec->list))
6070                                         list_add_tail(&rec->list,
6071                                                       &duplicate_extents);
6072
6073                                 /*
6074                                  * We have to do this song and dance in case we
6075                                  * find an extent record that falls inside of
6076                                  * our current extent record but does not have
6077                                  * the same objectid.
6078                                  */
6079                                 tmp = malloc(sizeof(*tmp));
6080                                 if (!tmp)
6081                                         return -ENOMEM;
6082                                 tmp->start = tmpl->start;
6083                                 tmp->max_size = tmpl->max_size;
6084                                 tmp->nr = tmpl->nr;
6085                                 tmp->found_rec = 1;
6086                                 tmp->metadata = tmpl->metadata;
6087                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6088                                 INIT_LIST_HEAD(&tmp->list);
6089                                 list_add_tail(&tmp->list, &rec->dups);
6090                                 rec->num_duplicates++;
6091                         } else {
6092                                 rec->nr = tmpl->nr;
6093                                 rec->found_rec = 1;
6094                         }
6095                 }
6096
6097                 if (tmpl->extent_item_refs && !dup) {
6098                         if (rec->extent_item_refs) {
6099                                 fprintf(stderr, "block %llu rec "
6100                                         "extent_item_refs %llu, passed %llu\n",
6101                                         (unsigned long long)tmpl->start,
6102                                         (unsigned long long)
6103                                                         rec->extent_item_refs,
6104                                         (unsigned long long)tmpl->extent_item_refs);
6105                         }
6106                         rec->extent_item_refs = tmpl->extent_item_refs;
6107                 }
6108                 if (tmpl->is_root)
6109                         rec->is_root = 1;
6110                 if (tmpl->content_checked)
6111                         rec->content_checked = 1;
6112                 if (tmpl->owner_ref_checked)
6113                         rec->owner_ref_checked = 1;
6114                 memcpy(&rec->parent_key, &tmpl->parent_key,
6115                                 sizeof(tmpl->parent_key));
6116                 if (tmpl->parent_generation)
6117                         rec->parent_generation = tmpl->parent_generation;
6118                 if (rec->max_size < tmpl->max_size)
6119                         rec->max_size = tmpl->max_size;
6120
6121                 /*
6122                  * A metadata extent can't cross stripe_len boundary, otherwise
6123                  * kernel scrub won't be able to handle it.
6124                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6125                  * it.
6126                  */
6127                 if (tmpl->metadata)
6128                         rec->crossing_stripes = check_crossing_stripes(
6129                                         global_info, rec->start,
6130                                         global_info->tree_root->nodesize);
6131                 check_extent_type(rec);
6132                 maybe_free_extent_rec(extent_cache, rec);
6133                 return ret;
6134         }
6135
6136         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6137
6138         return ret;
6139 }
6140
6141 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6142                             u64 parent, u64 root, int found_ref)
6143 {
6144         struct extent_record *rec;
6145         struct tree_backref *back;
6146         struct cache_extent *cache;
6147         int ret;
6148
6149         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6150         if (!cache) {
6151                 struct extent_record tmpl;
6152
6153                 memset(&tmpl, 0, sizeof(tmpl));
6154                 tmpl.start = bytenr;
6155                 tmpl.nr = 1;
6156                 tmpl.metadata = 1;
6157
6158                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6159                 if (ret)
6160                         return ret;
6161
6162                 /* really a bug in cache_extent implement now */
6163                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6164                 if (!cache)
6165                         return -ENOENT;
6166         }
6167
6168         rec = container_of(cache, struct extent_record, cache);
6169         if (rec->start != bytenr) {
6170                 /*
6171                  * Several cause, from unaligned bytenr to over lapping extents
6172                  */
6173                 return -EEXIST;
6174         }
6175
6176         back = find_tree_backref(rec, parent, root);
6177         if (!back) {
6178                 back = alloc_tree_backref(rec, parent, root);
6179                 if (!back)
6180                         return -ENOMEM;
6181         }
6182
6183         if (found_ref) {
6184                 if (back->node.found_ref) {
6185                         fprintf(stderr, "Extent back ref already exists "
6186                                 "for %llu parent %llu root %llu \n",
6187                                 (unsigned long long)bytenr,
6188                                 (unsigned long long)parent,
6189                                 (unsigned long long)root);
6190                 }
6191                 back->node.found_ref = 1;
6192         } else {
6193                 if (back->node.found_extent_tree) {
6194                         fprintf(stderr, "Extent back ref already exists "
6195                                 "for %llu parent %llu root %llu \n",
6196                                 (unsigned long long)bytenr,
6197                                 (unsigned long long)parent,
6198                                 (unsigned long long)root);
6199                 }
6200                 back->node.found_extent_tree = 1;
6201         }
6202         check_extent_type(rec);
6203         maybe_free_extent_rec(extent_cache, rec);
6204         return 0;
6205 }
6206
6207 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6208                             u64 parent, u64 root, u64 owner, u64 offset,
6209                             u32 num_refs, int found_ref, u64 max_size)
6210 {
6211         struct extent_record *rec;
6212         struct data_backref *back;
6213         struct cache_extent *cache;
6214         int ret;
6215
6216         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6217         if (!cache) {
6218                 struct extent_record tmpl;
6219
6220                 memset(&tmpl, 0, sizeof(tmpl));
6221                 tmpl.start = bytenr;
6222                 tmpl.nr = 1;
6223                 tmpl.max_size = max_size;
6224
6225                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6226                 if (ret)
6227                         return ret;
6228
6229                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6230                 if (!cache)
6231                         abort();
6232         }
6233
6234         rec = container_of(cache, struct extent_record, cache);
6235         if (rec->max_size < max_size)
6236                 rec->max_size = max_size;
6237
6238         /*
6239          * If found_ref is set then max_size is the real size and must match the
6240          * existing refs.  So if we have already found a ref then we need to
6241          * make sure that this ref matches the existing one, otherwise we need
6242          * to add a new backref so we can notice that the backrefs don't match
6243          * and we need to figure out who is telling the truth.  This is to
6244          * account for that awful fsync bug I introduced where we'd end up with
6245          * a btrfs_file_extent_item that would have its length include multiple
6246          * prealloc extents or point inside of a prealloc extent.
6247          */
6248         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6249                                  bytenr, max_size);
6250         if (!back) {
6251                 back = alloc_data_backref(rec, parent, root, owner, offset,
6252                                           max_size);
6253                 BUG_ON(!back);
6254         }
6255
6256         if (found_ref) {
6257                 BUG_ON(num_refs != 1);
6258                 if (back->node.found_ref)
6259                         BUG_ON(back->bytes != max_size);
6260                 back->node.found_ref = 1;
6261                 back->found_ref += 1;
6262                 back->bytes = max_size;
6263                 back->disk_bytenr = bytenr;
6264                 rec->refs += 1;
6265                 rec->content_checked = 1;
6266                 rec->owner_ref_checked = 1;
6267         } else {
6268                 if (back->node.found_extent_tree) {
6269                         fprintf(stderr, "Extent back ref already exists "
6270                                 "for %llu parent %llu root %llu "
6271                                 "owner %llu offset %llu num_refs %lu\n",
6272                                 (unsigned long long)bytenr,
6273                                 (unsigned long long)parent,
6274                                 (unsigned long long)root,
6275                                 (unsigned long long)owner,
6276                                 (unsigned long long)offset,
6277                                 (unsigned long)num_refs);
6278                 }
6279                 back->num_refs = num_refs;
6280                 back->node.found_extent_tree = 1;
6281         }
6282         maybe_free_extent_rec(extent_cache, rec);
6283         return 0;
6284 }
6285
6286 static int add_pending(struct cache_tree *pending,
6287                        struct cache_tree *seen, u64 bytenr, u32 size)
6288 {
6289         int ret;
6290         ret = add_cache_extent(seen, bytenr, size);
6291         if (ret)
6292                 return ret;
6293         add_cache_extent(pending, bytenr, size);
6294         return 0;
6295 }
6296
6297 static int pick_next_pending(struct cache_tree *pending,
6298                         struct cache_tree *reada,
6299                         struct cache_tree *nodes,
6300                         u64 last, struct block_info *bits, int bits_nr,
6301                         int *reada_bits)
6302 {
6303         unsigned long node_start = last;
6304         struct cache_extent *cache;
6305         int ret;
6306
6307         cache = search_cache_extent(reada, 0);
6308         if (cache) {
6309                 bits[0].start = cache->start;
6310                 bits[0].size = cache->size;
6311                 *reada_bits = 1;
6312                 return 1;
6313         }
6314         *reada_bits = 0;
6315         if (node_start > 32768)
6316                 node_start -= 32768;
6317
6318         cache = search_cache_extent(nodes, node_start);
6319         if (!cache)
6320                 cache = search_cache_extent(nodes, 0);
6321
6322         if (!cache) {
6323                  cache = search_cache_extent(pending, 0);
6324                  if (!cache)
6325                          return 0;
6326                  ret = 0;
6327                  do {
6328                          bits[ret].start = cache->start;
6329                          bits[ret].size = cache->size;
6330                          cache = next_cache_extent(cache);
6331                          ret++;
6332                  } while (cache && ret < bits_nr);
6333                  return ret;
6334         }
6335
6336         ret = 0;
6337         do {
6338                 bits[ret].start = cache->start;
6339                 bits[ret].size = cache->size;
6340                 cache = next_cache_extent(cache);
6341                 ret++;
6342         } while (cache && ret < bits_nr);
6343
6344         if (bits_nr - ret > 8) {
6345                 u64 lookup = bits[0].start + bits[0].size;
6346                 struct cache_extent *next;
6347                 next = search_cache_extent(pending, lookup);
6348                 while(next) {
6349                         if (next->start - lookup > 32768)
6350                                 break;
6351                         bits[ret].start = next->start;
6352                         bits[ret].size = next->size;
6353                         lookup = next->start + next->size;
6354                         ret++;
6355                         if (ret == bits_nr)
6356                                 break;
6357                         next = next_cache_extent(next);
6358                         if (!next)
6359                                 break;
6360                 }
6361         }
6362         return ret;
6363 }
6364
6365 static void free_chunk_record(struct cache_extent *cache)
6366 {
6367         struct chunk_record *rec;
6368
6369         rec = container_of(cache, struct chunk_record, cache);
6370         list_del_init(&rec->list);
6371         list_del_init(&rec->dextents);
6372         free(rec);
6373 }
6374
6375 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6376 {
6377         cache_tree_free_extents(chunk_cache, free_chunk_record);
6378 }
6379
6380 static void free_device_record(struct rb_node *node)
6381 {
6382         struct device_record *rec;
6383
6384         rec = container_of(node, struct device_record, node);
6385         free(rec);
6386 }
6387
6388 FREE_RB_BASED_TREE(device_cache, free_device_record);
6389
6390 int insert_block_group_record(struct block_group_tree *tree,
6391                               struct block_group_record *bg_rec)
6392 {
6393         int ret;
6394
6395         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6396         if (ret)
6397                 return ret;
6398
6399         list_add_tail(&bg_rec->list, &tree->block_groups);
6400         return 0;
6401 }
6402
6403 static void free_block_group_record(struct cache_extent *cache)
6404 {
6405         struct block_group_record *rec;
6406
6407         rec = container_of(cache, struct block_group_record, cache);
6408         list_del_init(&rec->list);
6409         free(rec);
6410 }
6411
6412 void free_block_group_tree(struct block_group_tree *tree)
6413 {
6414         cache_tree_free_extents(&tree->tree, free_block_group_record);
6415 }
6416
6417 int insert_device_extent_record(struct device_extent_tree *tree,
6418                                 struct device_extent_record *de_rec)
6419 {
6420         int ret;
6421
6422         /*
6423          * Device extent is a bit different from the other extents, because
6424          * the extents which belong to the different devices may have the
6425          * same start and size, so we need use the special extent cache
6426          * search/insert functions.
6427          */
6428         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6429         if (ret)
6430                 return ret;
6431
6432         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6433         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6434         return 0;
6435 }
6436
6437 static void free_device_extent_record(struct cache_extent *cache)
6438 {
6439         struct device_extent_record *rec;
6440
6441         rec = container_of(cache, struct device_extent_record, cache);
6442         if (!list_empty(&rec->chunk_list))
6443                 list_del_init(&rec->chunk_list);
6444         if (!list_empty(&rec->device_list))
6445                 list_del_init(&rec->device_list);
6446         free(rec);
6447 }
6448
6449 void free_device_extent_tree(struct device_extent_tree *tree)
6450 {
6451         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6452 }
6453
6454 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6455 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6456                                  struct extent_buffer *leaf, int slot)
6457 {
6458         struct btrfs_extent_ref_v0 *ref0;
6459         struct btrfs_key key;
6460         int ret;
6461
6462         btrfs_item_key_to_cpu(leaf, &key, slot);
6463         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6464         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6465                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6466                                 0, 0);
6467         } else {
6468                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6469                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6470         }
6471         return ret;
6472 }
6473 #endif
6474
6475 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6476                                             struct btrfs_key *key,
6477                                             int slot)
6478 {
6479         struct btrfs_chunk *ptr;
6480         struct chunk_record *rec;
6481         int num_stripes, i;
6482
6483         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6484         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6485
6486         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6487         if (!rec) {
6488                 fprintf(stderr, "memory allocation failed\n");
6489                 exit(-1);
6490         }
6491
6492         INIT_LIST_HEAD(&rec->list);
6493         INIT_LIST_HEAD(&rec->dextents);
6494         rec->bg_rec = NULL;
6495
6496         rec->cache.start = key->offset;
6497         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6498
6499         rec->generation = btrfs_header_generation(leaf);
6500
6501         rec->objectid = key->objectid;
6502         rec->type = key->type;
6503         rec->offset = key->offset;
6504
6505         rec->length = rec->cache.size;
6506         rec->owner = btrfs_chunk_owner(leaf, ptr);
6507         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6508         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6509         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6510         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6511         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6512         rec->num_stripes = num_stripes;
6513         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6514
6515         for (i = 0; i < rec->num_stripes; ++i) {
6516                 rec->stripes[i].devid =
6517                         btrfs_stripe_devid_nr(leaf, ptr, i);
6518                 rec->stripes[i].offset =
6519                         btrfs_stripe_offset_nr(leaf, ptr, i);
6520                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6521                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6522                                 BTRFS_UUID_SIZE);
6523         }
6524
6525         return rec;
6526 }
6527
6528 static int process_chunk_item(struct cache_tree *chunk_cache,
6529                               struct btrfs_key *key, struct extent_buffer *eb,
6530                               int slot)
6531 {
6532         struct chunk_record *rec;
6533         struct btrfs_chunk *chunk;
6534         int ret = 0;
6535
6536         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6537         /*
6538          * Do extra check for this chunk item,
6539          *
6540          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6541          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6542          * and owner<->key_type check.
6543          */
6544         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6545                                       key->offset);
6546         if (ret < 0) {
6547                 error("chunk(%llu, %llu) is not valid, ignore it",
6548                       key->offset, btrfs_chunk_length(eb, chunk));
6549                 return 0;
6550         }
6551         rec = btrfs_new_chunk_record(eb, key, slot);
6552         ret = insert_cache_extent(chunk_cache, &rec->cache);
6553         if (ret) {
6554                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6555                         rec->offset, rec->length);
6556                 free(rec);
6557         }
6558
6559         return ret;
6560 }
6561
6562 static int process_device_item(struct rb_root *dev_cache,
6563                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6564 {
6565         struct btrfs_dev_item *ptr;
6566         struct device_record *rec;
6567         int ret = 0;
6568
6569         ptr = btrfs_item_ptr(eb,
6570                 slot, struct btrfs_dev_item);
6571
6572         rec = malloc(sizeof(*rec));
6573         if (!rec) {
6574                 fprintf(stderr, "memory allocation failed\n");
6575                 return -ENOMEM;
6576         }
6577
6578         rec->devid = key->offset;
6579         rec->generation = btrfs_header_generation(eb);
6580
6581         rec->objectid = key->objectid;
6582         rec->type = key->type;
6583         rec->offset = key->offset;
6584
6585         rec->devid = btrfs_device_id(eb, ptr);
6586         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6587         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6588
6589         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6590         if (ret) {
6591                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6592                 free(rec);
6593         }
6594
6595         return ret;
6596 }
6597
6598 struct block_group_record *
6599 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6600                              int slot)
6601 {
6602         struct btrfs_block_group_item *ptr;
6603         struct block_group_record *rec;
6604
6605         rec = calloc(1, sizeof(*rec));
6606         if (!rec) {
6607                 fprintf(stderr, "memory allocation failed\n");
6608                 exit(-1);
6609         }
6610
6611         rec->cache.start = key->objectid;
6612         rec->cache.size = key->offset;
6613
6614         rec->generation = btrfs_header_generation(leaf);
6615
6616         rec->objectid = key->objectid;
6617         rec->type = key->type;
6618         rec->offset = key->offset;
6619
6620         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6621         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6622
6623         INIT_LIST_HEAD(&rec->list);
6624
6625         return rec;
6626 }
6627
6628 static int process_block_group_item(struct block_group_tree *block_group_cache,
6629                                     struct btrfs_key *key,
6630                                     struct extent_buffer *eb, int slot)
6631 {
6632         struct block_group_record *rec;
6633         int ret = 0;
6634
6635         rec = btrfs_new_block_group_record(eb, key, slot);
6636         ret = insert_block_group_record(block_group_cache, rec);
6637         if (ret) {
6638                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6639                         rec->objectid, rec->offset);
6640                 free(rec);
6641         }
6642
6643         return ret;
6644 }
6645
6646 struct device_extent_record *
6647 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6648                                struct btrfs_key *key, int slot)
6649 {
6650         struct device_extent_record *rec;
6651         struct btrfs_dev_extent *ptr;
6652
6653         rec = calloc(1, sizeof(*rec));
6654         if (!rec) {
6655                 fprintf(stderr, "memory allocation failed\n");
6656                 exit(-1);
6657         }
6658
6659         rec->cache.objectid = key->objectid;
6660         rec->cache.start = key->offset;
6661
6662         rec->generation = btrfs_header_generation(leaf);
6663
6664         rec->objectid = key->objectid;
6665         rec->type = key->type;
6666         rec->offset = key->offset;
6667
6668         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6669         rec->chunk_objecteid =
6670                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6671         rec->chunk_offset =
6672                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6673         rec->length = btrfs_dev_extent_length(leaf, ptr);
6674         rec->cache.size = rec->length;
6675
6676         INIT_LIST_HEAD(&rec->chunk_list);
6677         INIT_LIST_HEAD(&rec->device_list);
6678
6679         return rec;
6680 }
6681
6682 static int
6683 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6684                            struct btrfs_key *key, struct extent_buffer *eb,
6685                            int slot)
6686 {
6687         struct device_extent_record *rec;
6688         int ret;
6689
6690         rec = btrfs_new_device_extent_record(eb, key, slot);
6691         ret = insert_device_extent_record(dev_extent_cache, rec);
6692         if (ret) {
6693                 fprintf(stderr,
6694                         "Device extent[%llu, %llu, %llu] existed.\n",
6695                         rec->objectid, rec->offset, rec->length);
6696                 free(rec);
6697         }
6698
6699         return ret;
6700 }
6701
6702 static int process_extent_item(struct btrfs_root *root,
6703                                struct cache_tree *extent_cache,
6704                                struct extent_buffer *eb, int slot)
6705 {
6706         struct btrfs_extent_item *ei;
6707         struct btrfs_extent_inline_ref *iref;
6708         struct btrfs_extent_data_ref *dref;
6709         struct btrfs_shared_data_ref *sref;
6710         struct btrfs_key key;
6711         struct extent_record tmpl;
6712         unsigned long end;
6713         unsigned long ptr;
6714         int ret;
6715         int type;
6716         u32 item_size = btrfs_item_size_nr(eb, slot);
6717         u64 refs = 0;
6718         u64 offset;
6719         u64 num_bytes;
6720         int metadata = 0;
6721
6722         btrfs_item_key_to_cpu(eb, &key, slot);
6723
6724         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6725                 metadata = 1;
6726                 num_bytes = root->nodesize;
6727         } else {
6728                 num_bytes = key.offset;
6729         }
6730
6731         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6732                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6733                       key.objectid, root->sectorsize);
6734                 return -EIO;
6735         }
6736         if (item_size < sizeof(*ei)) {
6737 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6738                 struct btrfs_extent_item_v0 *ei0;
6739                 BUG_ON(item_size != sizeof(*ei0));
6740                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6741                 refs = btrfs_extent_refs_v0(eb, ei0);
6742 #else
6743                 BUG();
6744 #endif
6745                 memset(&tmpl, 0, sizeof(tmpl));
6746                 tmpl.start = key.objectid;
6747                 tmpl.nr = num_bytes;
6748                 tmpl.extent_item_refs = refs;
6749                 tmpl.metadata = metadata;
6750                 tmpl.found_rec = 1;
6751                 tmpl.max_size = num_bytes;
6752
6753                 return add_extent_rec(extent_cache, &tmpl);
6754         }
6755
6756         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6757         refs = btrfs_extent_refs(eb, ei);
6758         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6759                 metadata = 1;
6760         else
6761                 metadata = 0;
6762         if (metadata && num_bytes != root->nodesize) {
6763                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6764                       num_bytes, root->nodesize);
6765                 return -EIO;
6766         }
6767         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6768                 error("ignore invalid data extent, length %llu is not aligned to %u",
6769                       num_bytes, root->sectorsize);
6770                 return -EIO;
6771         }
6772
6773         memset(&tmpl, 0, sizeof(tmpl));
6774         tmpl.start = key.objectid;
6775         tmpl.nr = num_bytes;
6776         tmpl.extent_item_refs = refs;
6777         tmpl.metadata = metadata;
6778         tmpl.found_rec = 1;
6779         tmpl.max_size = num_bytes;
6780         add_extent_rec(extent_cache, &tmpl);
6781
6782         ptr = (unsigned long)(ei + 1);
6783         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6784             key.type == BTRFS_EXTENT_ITEM_KEY)
6785                 ptr += sizeof(struct btrfs_tree_block_info);
6786
6787         end = (unsigned long)ei + item_size;
6788         while (ptr < end) {
6789                 iref = (struct btrfs_extent_inline_ref *)ptr;
6790                 type = btrfs_extent_inline_ref_type(eb, iref);
6791                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6792                 switch (type) {
6793                 case BTRFS_TREE_BLOCK_REF_KEY:
6794                         ret = add_tree_backref(extent_cache, key.objectid,
6795                                         0, offset, 0);
6796                         if (ret < 0)
6797                                 error("add_tree_backref failed: %s",
6798                                       strerror(-ret));
6799                         break;
6800                 case BTRFS_SHARED_BLOCK_REF_KEY:
6801                         ret = add_tree_backref(extent_cache, key.objectid,
6802                                         offset, 0, 0);
6803                         if (ret < 0)
6804                                 error("add_tree_backref failed: %s",
6805                                       strerror(-ret));
6806                         break;
6807                 case BTRFS_EXTENT_DATA_REF_KEY:
6808                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6809                         add_data_backref(extent_cache, key.objectid, 0,
6810                                         btrfs_extent_data_ref_root(eb, dref),
6811                                         btrfs_extent_data_ref_objectid(eb,
6812                                                                        dref),
6813                                         btrfs_extent_data_ref_offset(eb, dref),
6814                                         btrfs_extent_data_ref_count(eb, dref),
6815                                         0, num_bytes);
6816                         break;
6817                 case BTRFS_SHARED_DATA_REF_KEY:
6818                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6819                         add_data_backref(extent_cache, key.objectid, offset,
6820                                         0, 0, 0,
6821                                         btrfs_shared_data_ref_count(eb, sref),
6822                                         0, num_bytes);
6823                         break;
6824                 default:
6825                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6826                                 key.objectid, key.type, num_bytes);
6827                         goto out;
6828                 }
6829                 ptr += btrfs_extent_inline_ref_size(type);
6830         }
6831         WARN_ON(ptr > end);
6832 out:
6833         return 0;
6834 }
6835
6836 static int check_cache_range(struct btrfs_root *root,
6837                              struct btrfs_block_group_cache *cache,
6838                              u64 offset, u64 bytes)
6839 {
6840         struct btrfs_free_space *entry;
6841         u64 *logical;
6842         u64 bytenr;
6843         int stripe_len;
6844         int i, nr, ret;
6845
6846         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6847                 bytenr = btrfs_sb_offset(i);
6848                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6849                                        cache->key.objectid, bytenr, 0,
6850                                        &logical, &nr, &stripe_len);
6851                 if (ret)
6852                         return ret;
6853
6854                 while (nr--) {
6855                         if (logical[nr] + stripe_len <= offset)
6856                                 continue;
6857                         if (offset + bytes <= logical[nr])
6858                                 continue;
6859                         if (logical[nr] == offset) {
6860                                 if (stripe_len >= bytes) {
6861                                         free(logical);
6862                                         return 0;
6863                                 }
6864                                 bytes -= stripe_len;
6865                                 offset += stripe_len;
6866                         } else if (logical[nr] < offset) {
6867                                 if (logical[nr] + stripe_len >=
6868                                     offset + bytes) {
6869                                         free(logical);
6870                                         return 0;
6871                                 }
6872                                 bytes = (offset + bytes) -
6873                                         (logical[nr] + stripe_len);
6874                                 offset = logical[nr] + stripe_len;
6875                         } else {
6876                                 /*
6877                                  * Could be tricky, the super may land in the
6878                                  * middle of the area we're checking.  First
6879                                  * check the easiest case, it's at the end.
6880                                  */
6881                                 if (logical[nr] + stripe_len >=
6882                                     bytes + offset) {
6883                                         bytes = logical[nr] - offset;
6884                                         continue;
6885                                 }
6886
6887                                 /* Check the left side */
6888                                 ret = check_cache_range(root, cache,
6889                                                         offset,
6890                                                         logical[nr] - offset);
6891                                 if (ret) {
6892                                         free(logical);
6893                                         return ret;
6894                                 }
6895
6896                                 /* Now we continue with the right side */
6897                                 bytes = (offset + bytes) -
6898                                         (logical[nr] + stripe_len);
6899                                 offset = logical[nr] + stripe_len;
6900                         }
6901                 }
6902
6903                 free(logical);
6904         }
6905
6906         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6907         if (!entry) {
6908                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6909                         offset, offset+bytes);
6910                 return -EINVAL;
6911         }
6912
6913         if (entry->offset != offset) {
6914                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6915                         entry->offset);
6916                 return -EINVAL;
6917         }
6918
6919         if (entry->bytes != bytes) {
6920                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6921                         bytes, entry->bytes, offset);
6922                 return -EINVAL;
6923         }
6924
6925         unlink_free_space(cache->free_space_ctl, entry);
6926         free(entry);
6927         return 0;
6928 }
6929
6930 static int verify_space_cache(struct btrfs_root *root,
6931                               struct btrfs_block_group_cache *cache)
6932 {
6933         struct btrfs_path path;
6934         struct extent_buffer *leaf;
6935         struct btrfs_key key;
6936         u64 last;
6937         int ret = 0;
6938
6939         root = root->fs_info->extent_root;
6940
6941         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6942
6943         btrfs_init_path(&path);
6944         key.objectid = last;
6945         key.offset = 0;
6946         key.type = BTRFS_EXTENT_ITEM_KEY;
6947         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6948         if (ret < 0)
6949                 goto out;
6950         ret = 0;
6951         while (1) {
6952                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6953                         ret = btrfs_next_leaf(root, &path);
6954                         if (ret < 0)
6955                                 goto out;
6956                         if (ret > 0) {
6957                                 ret = 0;
6958                                 break;
6959                         }
6960                 }
6961                 leaf = path.nodes[0];
6962                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6963                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6964                         break;
6965                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6966                     key.type != BTRFS_METADATA_ITEM_KEY) {
6967                         path.slots[0]++;
6968                         continue;
6969                 }
6970
6971                 if (last == key.objectid) {
6972                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6973                                 last = key.objectid + key.offset;
6974                         else
6975                                 last = key.objectid + root->nodesize;
6976                         path.slots[0]++;
6977                         continue;
6978                 }
6979
6980                 ret = check_cache_range(root, cache, last,
6981                                         key.objectid - last);
6982                 if (ret)
6983                         break;
6984                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6985                         last = key.objectid + key.offset;
6986                 else
6987                         last = key.objectid + root->nodesize;
6988                 path.slots[0]++;
6989         }
6990
6991         if (last < cache->key.objectid + cache->key.offset)
6992                 ret = check_cache_range(root, cache, last,
6993                                         cache->key.objectid +
6994                                         cache->key.offset - last);
6995
6996 out:
6997         btrfs_release_path(&path);
6998
6999         if (!ret &&
7000             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7001                 fprintf(stderr, "There are still entries left in the space "
7002                         "cache\n");
7003                 ret = -EINVAL;
7004         }
7005
7006         return ret;
7007 }
7008
7009 static int check_space_cache(struct btrfs_root *root)
7010 {
7011         struct btrfs_block_group_cache *cache;
7012         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7013         int ret;
7014         int error = 0;
7015
7016         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7017             btrfs_super_generation(root->fs_info->super_copy) !=
7018             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7019                 printf("cache and super generation don't match, space cache "
7020                        "will be invalidated\n");
7021                 return 0;
7022         }
7023
7024         if (ctx.progress_enabled) {
7025                 ctx.tp = TASK_FREE_SPACE;
7026                 task_start(ctx.info);
7027         }
7028
7029         while (1) {
7030                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7031                 if (!cache)
7032                         break;
7033
7034                 start = cache->key.objectid + cache->key.offset;
7035                 if (!cache->free_space_ctl) {
7036                         if (btrfs_init_free_space_ctl(cache,
7037                                                       root->sectorsize)) {
7038                                 ret = -ENOMEM;
7039                                 break;
7040                         }
7041                 } else {
7042                         btrfs_remove_free_space_cache(cache);
7043                 }
7044
7045                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7046                         ret = exclude_super_stripes(root, cache);
7047                         if (ret) {
7048                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7049                                         strerror(-ret));
7050                                 error++;
7051                                 continue;
7052                         }
7053                         ret = load_free_space_tree(root->fs_info, cache);
7054                         free_excluded_extents(root, cache);
7055                         if (ret < 0) {
7056                                 fprintf(stderr, "could not load free space tree: %s\n",
7057                                         strerror(-ret));
7058                                 error++;
7059                                 continue;
7060                         }
7061                         error += ret;
7062                 } else {
7063                         ret = load_free_space_cache(root->fs_info, cache);
7064                         if (!ret)
7065                                 continue;
7066                 }
7067
7068                 ret = verify_space_cache(root, cache);
7069                 if (ret) {
7070                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7071                                 cache->key.objectid);
7072                         error++;
7073                 }
7074         }
7075
7076         task_stop(ctx.info);
7077
7078         return error ? -EINVAL : 0;
7079 }
7080
7081 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7082                         u64 num_bytes, unsigned long leaf_offset,
7083                         struct extent_buffer *eb) {
7084
7085         u64 offset = 0;
7086         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7087         char *data;
7088         unsigned long csum_offset;
7089         u32 csum;
7090         u32 csum_expected;
7091         u64 read_len;
7092         u64 data_checked = 0;
7093         u64 tmp;
7094         int ret = 0;
7095         int mirror;
7096         int num_copies;
7097
7098         if (num_bytes % root->sectorsize)
7099                 return -EINVAL;
7100
7101         data = malloc(num_bytes);
7102         if (!data)
7103                 return -ENOMEM;
7104
7105         while (offset < num_bytes) {
7106                 mirror = 0;
7107 again:
7108                 read_len = num_bytes - offset;
7109                 /* read as much space once a time */
7110                 ret = read_extent_data(root, data + offset,
7111                                 bytenr + offset, &read_len, mirror);
7112                 if (ret)
7113                         goto out;
7114                 data_checked = 0;
7115                 /* verify every 4k data's checksum */
7116                 while (data_checked < read_len) {
7117                         csum = ~(u32)0;
7118                         tmp = offset + data_checked;
7119
7120                         csum = btrfs_csum_data((char *)data + tmp,
7121                                                csum, root->sectorsize);
7122                         btrfs_csum_final(csum, (u8 *)&csum);
7123
7124                         csum_offset = leaf_offset +
7125                                  tmp / root->sectorsize * csum_size;
7126                         read_extent_buffer(eb, (char *)&csum_expected,
7127                                            csum_offset, csum_size);
7128                         /* try another mirror */
7129                         if (csum != csum_expected) {
7130                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7131                                                 mirror, bytenr + tmp,
7132                                                 csum, csum_expected);
7133                                 num_copies = btrfs_num_copies(
7134                                                 &root->fs_info->mapping_tree,
7135                                                 bytenr, num_bytes);
7136                                 if (mirror < num_copies - 1) {
7137                                         mirror += 1;
7138                                         goto again;
7139                                 }
7140                         }
7141                         data_checked += root->sectorsize;
7142                 }
7143                 offset += read_len;
7144         }
7145 out:
7146         free(data);
7147         return ret;
7148 }
7149
7150 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7151                                u64 num_bytes)
7152 {
7153         struct btrfs_path path;
7154         struct extent_buffer *leaf;
7155         struct btrfs_key key;
7156         int ret;
7157
7158         btrfs_init_path(&path);
7159         key.objectid = bytenr;
7160         key.type = BTRFS_EXTENT_ITEM_KEY;
7161         key.offset = (u64)-1;
7162
7163 again:
7164         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7165                                 0, 0);
7166         if (ret < 0) {
7167                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7168                 btrfs_release_path(&path);
7169                 return ret;
7170         } else if (ret) {
7171                 if (path.slots[0] > 0) {
7172                         path.slots[0]--;
7173                 } else {
7174                         ret = btrfs_prev_leaf(root, &path);
7175                         if (ret < 0) {
7176                                 goto out;
7177                         } else if (ret > 0) {
7178                                 ret = 0;
7179                                 goto out;
7180                         }
7181                 }
7182         }
7183
7184         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7185
7186         /*
7187          * Block group items come before extent items if they have the same
7188          * bytenr, so walk back one more just in case.  Dear future traveller,
7189          * first congrats on mastering time travel.  Now if it's not too much
7190          * trouble could you go back to 2006 and tell Chris to make the
7191          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7192          * EXTENT_ITEM_KEY please?
7193          */
7194         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7195                 if (path.slots[0] > 0) {
7196                         path.slots[0]--;
7197                 } else {
7198                         ret = btrfs_prev_leaf(root, &path);
7199                         if (ret < 0) {
7200                                 goto out;
7201                         } else if (ret > 0) {
7202                                 ret = 0;
7203                                 goto out;
7204                         }
7205                 }
7206                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7207         }
7208
7209         while (num_bytes) {
7210                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7211                         ret = btrfs_next_leaf(root, &path);
7212                         if (ret < 0) {
7213                                 fprintf(stderr, "Error going to next leaf "
7214                                         "%d\n", ret);
7215                                 btrfs_release_path(&path);
7216                                 return ret;
7217                         } else if (ret) {
7218                                 break;
7219                         }
7220                 }
7221                 leaf = path.nodes[0];
7222                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7223                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7224                         path.slots[0]++;
7225                         continue;
7226                 }
7227                 if (key.objectid + key.offset < bytenr) {
7228                         path.slots[0]++;
7229                         continue;
7230                 }
7231                 if (key.objectid > bytenr + num_bytes)
7232                         break;
7233
7234                 if (key.objectid == bytenr) {
7235                         if (key.offset >= num_bytes) {
7236                                 num_bytes = 0;
7237                                 break;
7238                         }
7239                         num_bytes -= key.offset;
7240                         bytenr += key.offset;
7241                 } else if (key.objectid < bytenr) {
7242                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7243                                 num_bytes = 0;
7244                                 break;
7245                         }
7246                         num_bytes = (bytenr + num_bytes) -
7247                                 (key.objectid + key.offset);
7248                         bytenr = key.objectid + key.offset;
7249                 } else {
7250                         if (key.objectid + key.offset < bytenr + num_bytes) {
7251                                 u64 new_start = key.objectid + key.offset;
7252                                 u64 new_bytes = bytenr + num_bytes - new_start;
7253
7254                                 /*
7255                                  * Weird case, the extent is in the middle of
7256                                  * our range, we'll have to search one side
7257                                  * and then the other.  Not sure if this happens
7258                                  * in real life, but no harm in coding it up
7259                                  * anyway just in case.
7260                                  */
7261                                 btrfs_release_path(&path);
7262                                 ret = check_extent_exists(root, new_start,
7263                                                           new_bytes);
7264                                 if (ret) {
7265                                         fprintf(stderr, "Right section didn't "
7266                                                 "have a record\n");
7267                                         break;
7268                                 }
7269                                 num_bytes = key.objectid - bytenr;
7270                                 goto again;
7271                         }
7272                         num_bytes = key.objectid - bytenr;
7273                 }
7274                 path.slots[0]++;
7275         }
7276         ret = 0;
7277
7278 out:
7279         if (num_bytes && !ret) {
7280                 fprintf(stderr, "There are no extents for csum range "
7281                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7282                 ret = 1;
7283         }
7284
7285         btrfs_release_path(&path);
7286         return ret;
7287 }
7288
7289 static int check_csums(struct btrfs_root *root)
7290 {
7291         struct btrfs_path path;
7292         struct extent_buffer *leaf;
7293         struct btrfs_key key;
7294         u64 offset = 0, num_bytes = 0;
7295         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7296         int errors = 0;
7297         int ret;
7298         u64 data_len;
7299         unsigned long leaf_offset;
7300
7301         root = root->fs_info->csum_root;
7302         if (!extent_buffer_uptodate(root->node)) {
7303                 fprintf(stderr, "No valid csum tree found\n");
7304                 return -ENOENT;
7305         }
7306
7307         btrfs_init_path(&path);
7308         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7309         key.type = BTRFS_EXTENT_CSUM_KEY;
7310         key.offset = 0;
7311         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7312         if (ret < 0) {
7313                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7314                 btrfs_release_path(&path);
7315                 return ret;
7316         }
7317
7318         if (ret > 0 && path.slots[0])
7319                 path.slots[0]--;
7320         ret = 0;
7321
7322         while (1) {
7323                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7324                         ret = btrfs_next_leaf(root, &path);
7325                         if (ret < 0) {
7326                                 fprintf(stderr, "Error going to next leaf "
7327                                         "%d\n", ret);
7328                                 break;
7329                         }
7330                         if (ret)
7331                                 break;
7332                 }
7333                 leaf = path.nodes[0];
7334
7335                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7336                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7337                         path.slots[0]++;
7338                         continue;
7339                 }
7340
7341                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7342                               csum_size) * root->sectorsize;
7343                 if (!check_data_csum)
7344                         goto skip_csum_check;
7345                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7346                 ret = check_extent_csums(root, key.offset, data_len,
7347                                          leaf_offset, leaf);
7348                 if (ret)
7349                         break;
7350 skip_csum_check:
7351                 if (!num_bytes) {
7352                         offset = key.offset;
7353                 } else if (key.offset != offset + num_bytes) {
7354                         ret = check_extent_exists(root, offset, num_bytes);
7355                         if (ret) {
7356                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7357                                         "there is no extent record\n",
7358                                         offset, offset+num_bytes);
7359                                 errors++;
7360                         }
7361                         offset = key.offset;
7362                         num_bytes = 0;
7363                 }
7364                 num_bytes += data_len;
7365                 path.slots[0]++;
7366         }
7367
7368         btrfs_release_path(&path);
7369         return errors;
7370 }
7371
7372 static int is_dropped_key(struct btrfs_key *key,
7373                           struct btrfs_key *drop_key) {
7374         if (key->objectid < drop_key->objectid)
7375                 return 1;
7376         else if (key->objectid == drop_key->objectid) {
7377                 if (key->type < drop_key->type)
7378                         return 1;
7379                 else if (key->type == drop_key->type) {
7380                         if (key->offset < drop_key->offset)
7381                                 return 1;
7382                 }
7383         }
7384         return 0;
7385 }
7386
7387 /*
7388  * Here are the rules for FULL_BACKREF.
7389  *
7390  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7391  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7392  *      FULL_BACKREF set.
7393  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7394  *    if it happened after the relocation occurred since we'll have dropped the
7395  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7396  *    have no real way to know for sure.
7397  *
7398  * We process the blocks one root at a time, and we start from the lowest root
7399  * objectid and go to the highest.  So we can just lookup the owner backref for
7400  * the record and if we don't find it then we know it doesn't exist and we have
7401  * a FULL BACKREF.
7402  *
7403  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7404  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7405  * be set or not and then we can check later once we've gathered all the refs.
7406  */
7407 static int calc_extent_flag(struct btrfs_root *root,
7408                            struct cache_tree *extent_cache,
7409                            struct extent_buffer *buf,
7410                            struct root_item_record *ri,
7411                            u64 *flags)
7412 {
7413         struct extent_record *rec;
7414         struct cache_extent *cache;
7415         struct tree_backref *tback;
7416         u64 owner = 0;
7417
7418         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7419         /* we have added this extent before */
7420         if (!cache)
7421                 return -ENOENT;
7422
7423         rec = container_of(cache, struct extent_record, cache);
7424
7425         /*
7426          * Except file/reloc tree, we can not have
7427          * FULL BACKREF MODE
7428          */
7429         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7430                 goto normal;
7431         /*
7432          * root node
7433          */
7434         if (buf->start == ri->bytenr)
7435                 goto normal;
7436
7437         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7438                 goto full_backref;
7439
7440         owner = btrfs_header_owner(buf);
7441         if (owner == ri->objectid)
7442                 goto normal;
7443
7444         tback = find_tree_backref(rec, 0, owner);
7445         if (!tback)
7446                 goto full_backref;
7447 normal:
7448         *flags = 0;
7449         if (rec->flag_block_full_backref != FLAG_UNSET &&
7450             rec->flag_block_full_backref != 0)
7451                 rec->bad_full_backref = 1;
7452         return 0;
7453 full_backref:
7454         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7455         if (rec->flag_block_full_backref != FLAG_UNSET &&
7456             rec->flag_block_full_backref != 1)
7457                 rec->bad_full_backref = 1;
7458         return 0;
7459 }
7460
7461 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7462 {
7463         fprintf(stderr, "Invalid key type(");
7464         print_key_type(stderr, 0, key_type);
7465         fprintf(stderr, ") found in root(");
7466         print_objectid(stderr, rootid, 0);
7467         fprintf(stderr, ")\n");
7468 }
7469
7470 /*
7471  * Check if the key is valid with its extent buffer.
7472  *
7473  * This is a early check in case invalid key exists in a extent buffer
7474  * This is not comprehensive yet, but should prevent wrong key/item passed
7475  * further
7476  */
7477 static int check_type_with_root(u64 rootid, u8 key_type)
7478 {
7479         switch (key_type) {
7480         /* Only valid in chunk tree */
7481         case BTRFS_DEV_ITEM_KEY:
7482         case BTRFS_CHUNK_ITEM_KEY:
7483                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7484                         goto err;
7485                 break;
7486         /* valid in csum and log tree */
7487         case BTRFS_CSUM_TREE_OBJECTID:
7488                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7489                       is_fstree(rootid)))
7490                         goto err;
7491                 break;
7492         case BTRFS_EXTENT_ITEM_KEY:
7493         case BTRFS_METADATA_ITEM_KEY:
7494         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7495                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7496                         goto err;
7497                 break;
7498         case BTRFS_ROOT_ITEM_KEY:
7499                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7500                         goto err;
7501                 break;
7502         case BTRFS_DEV_EXTENT_KEY:
7503                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7504                         goto err;
7505                 break;
7506         }
7507         return 0;
7508 err:
7509         report_mismatch_key_root(key_type, rootid);
7510         return -EINVAL;
7511 }
7512
7513 static int run_next_block(struct btrfs_root *root,
7514                           struct block_info *bits,
7515                           int bits_nr,
7516                           u64 *last,
7517                           struct cache_tree *pending,
7518                           struct cache_tree *seen,
7519                           struct cache_tree *reada,
7520                           struct cache_tree *nodes,
7521                           struct cache_tree *extent_cache,
7522                           struct cache_tree *chunk_cache,
7523                           struct rb_root *dev_cache,
7524                           struct block_group_tree *block_group_cache,
7525                           struct device_extent_tree *dev_extent_cache,
7526                           struct root_item_record *ri)
7527 {
7528         struct extent_buffer *buf;
7529         struct extent_record *rec = NULL;
7530         u64 bytenr;
7531         u32 size;
7532         u64 parent;
7533         u64 owner;
7534         u64 flags;
7535         u64 ptr;
7536         u64 gen = 0;
7537         int ret = 0;
7538         int i;
7539         int nritems;
7540         struct btrfs_key key;
7541         struct cache_extent *cache;
7542         int reada_bits;
7543
7544         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7545                                     bits_nr, &reada_bits);
7546         if (nritems == 0)
7547                 return 1;
7548
7549         if (!reada_bits) {
7550                 for(i = 0; i < nritems; i++) {
7551                         ret = add_cache_extent(reada, bits[i].start,
7552                                                bits[i].size);
7553                         if (ret == -EEXIST)
7554                                 continue;
7555
7556                         /* fixme, get the parent transid */
7557                         readahead_tree_block(root, bits[i].start,
7558                                              bits[i].size, 0);
7559                 }
7560         }
7561         *last = bits[0].start;
7562         bytenr = bits[0].start;
7563         size = bits[0].size;
7564
7565         cache = lookup_cache_extent(pending, bytenr, size);
7566         if (cache) {
7567                 remove_cache_extent(pending, cache);
7568                 free(cache);
7569         }
7570         cache = lookup_cache_extent(reada, bytenr, size);
7571         if (cache) {
7572                 remove_cache_extent(reada, cache);
7573                 free(cache);
7574         }
7575         cache = lookup_cache_extent(nodes, bytenr, size);
7576         if (cache) {
7577                 remove_cache_extent(nodes, cache);
7578                 free(cache);
7579         }
7580         cache = lookup_cache_extent(extent_cache, bytenr, size);
7581         if (cache) {
7582                 rec = container_of(cache, struct extent_record, cache);
7583                 gen = rec->parent_generation;
7584         }
7585
7586         /* fixme, get the real parent transid */
7587         buf = read_tree_block(root, bytenr, size, gen);
7588         if (!extent_buffer_uptodate(buf)) {
7589                 record_bad_block_io(root->fs_info,
7590                                     extent_cache, bytenr, size);
7591                 goto out;
7592         }
7593
7594         nritems = btrfs_header_nritems(buf);
7595
7596         flags = 0;
7597         if (!init_extent_tree) {
7598                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7599                                        btrfs_header_level(buf), 1, NULL,
7600                                        &flags);
7601                 if (ret < 0) {
7602                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7603                         if (ret < 0) {
7604                                 fprintf(stderr, "Couldn't calc extent flags\n");
7605                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7606                         }
7607                 }
7608         } else {
7609                 flags = 0;
7610                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7611                 if (ret < 0) {
7612                         fprintf(stderr, "Couldn't calc extent flags\n");
7613                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7614                 }
7615         }
7616
7617         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7618                 if (ri != NULL &&
7619                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7620                     ri->objectid == btrfs_header_owner(buf)) {
7621                         /*
7622                          * Ok we got to this block from it's original owner and
7623                          * we have FULL_BACKREF set.  Relocation can leave
7624                          * converted blocks over so this is altogether possible,
7625                          * however it's not possible if the generation > the
7626                          * last snapshot, so check for this case.
7627                          */
7628                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7629                             btrfs_header_generation(buf) > ri->last_snapshot) {
7630                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7631                                 rec->bad_full_backref = 1;
7632                         }
7633                 }
7634         } else {
7635                 if (ri != NULL &&
7636                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7637                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7638                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7639                         rec->bad_full_backref = 1;
7640                 }
7641         }
7642
7643         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7644                 rec->flag_block_full_backref = 1;
7645                 parent = bytenr;
7646                 owner = 0;
7647         } else {
7648                 rec->flag_block_full_backref = 0;
7649                 parent = 0;
7650                 owner = btrfs_header_owner(buf);
7651         }
7652
7653         ret = check_block(root, extent_cache, buf, flags);
7654         if (ret)
7655                 goto out;
7656
7657         if (btrfs_is_leaf(buf)) {
7658                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7659                 for (i = 0; i < nritems; i++) {
7660                         struct btrfs_file_extent_item *fi;
7661                         btrfs_item_key_to_cpu(buf, &key, i);
7662                         /*
7663                          * Check key type against the leaf owner.
7664                          * Could filter quite a lot of early error if
7665                          * owner is correct
7666                          */
7667                         if (check_type_with_root(btrfs_header_owner(buf),
7668                                                  key.type)) {
7669                                 fprintf(stderr, "ignoring invalid key\n");
7670                                 continue;
7671                         }
7672                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7673                                 process_extent_item(root, extent_cache, buf,
7674                                                     i);
7675                                 continue;
7676                         }
7677                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7678                                 process_extent_item(root, extent_cache, buf,
7679                                                     i);
7680                                 continue;
7681                         }
7682                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7683                                 total_csum_bytes +=
7684                                         btrfs_item_size_nr(buf, i);
7685                                 continue;
7686                         }
7687                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7688                                 process_chunk_item(chunk_cache, &key, buf, i);
7689                                 continue;
7690                         }
7691                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7692                                 process_device_item(dev_cache, &key, buf, i);
7693                                 continue;
7694                         }
7695                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7696                                 process_block_group_item(block_group_cache,
7697                                         &key, buf, i);
7698                                 continue;
7699                         }
7700                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7701                                 process_device_extent_item(dev_extent_cache,
7702                                         &key, buf, i);
7703                                 continue;
7704
7705                         }
7706                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7707 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7708                                 process_extent_ref_v0(extent_cache, buf, i);
7709 #else
7710                                 BUG();
7711 #endif
7712                                 continue;
7713                         }
7714
7715                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7716                                 ret = add_tree_backref(extent_cache,
7717                                                 key.objectid, 0, key.offset, 0);
7718                                 if (ret < 0)
7719                                         error("add_tree_backref failed: %s",
7720                                               strerror(-ret));
7721                                 continue;
7722                         }
7723                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7724                                 ret = add_tree_backref(extent_cache,
7725                                                 key.objectid, key.offset, 0, 0);
7726                                 if (ret < 0)
7727                                         error("add_tree_backref failed: %s",
7728                                               strerror(-ret));
7729                                 continue;
7730                         }
7731                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7732                                 struct btrfs_extent_data_ref *ref;
7733                                 ref = btrfs_item_ptr(buf, i,
7734                                                 struct btrfs_extent_data_ref);
7735                                 add_data_backref(extent_cache,
7736                                         key.objectid, 0,
7737                                         btrfs_extent_data_ref_root(buf, ref),
7738                                         btrfs_extent_data_ref_objectid(buf,
7739                                                                        ref),
7740                                         btrfs_extent_data_ref_offset(buf, ref),
7741                                         btrfs_extent_data_ref_count(buf, ref),
7742                                         0, root->sectorsize);
7743                                 continue;
7744                         }
7745                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7746                                 struct btrfs_shared_data_ref *ref;
7747                                 ref = btrfs_item_ptr(buf, i,
7748                                                 struct btrfs_shared_data_ref);
7749                                 add_data_backref(extent_cache,
7750                                         key.objectid, key.offset, 0, 0, 0,
7751                                         btrfs_shared_data_ref_count(buf, ref),
7752                                         0, root->sectorsize);
7753                                 continue;
7754                         }
7755                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7756                                 struct bad_item *bad;
7757
7758                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7759                                         continue;
7760                                 if (!owner)
7761                                         continue;
7762                                 bad = malloc(sizeof(struct bad_item));
7763                                 if (!bad)
7764                                         continue;
7765                                 INIT_LIST_HEAD(&bad->list);
7766                                 memcpy(&bad->key, &key,
7767                                        sizeof(struct btrfs_key));
7768                                 bad->root_id = owner;
7769                                 list_add_tail(&bad->list, &delete_items);
7770                                 continue;
7771                         }
7772                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7773                                 continue;
7774                         fi = btrfs_item_ptr(buf, i,
7775                                             struct btrfs_file_extent_item);
7776                         if (btrfs_file_extent_type(buf, fi) ==
7777                             BTRFS_FILE_EXTENT_INLINE)
7778                                 continue;
7779                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7780                                 continue;
7781
7782                         data_bytes_allocated +=
7783                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7784                         if (data_bytes_allocated < root->sectorsize) {
7785                                 abort();
7786                         }
7787                         data_bytes_referenced +=
7788                                 btrfs_file_extent_num_bytes(buf, fi);
7789                         add_data_backref(extent_cache,
7790                                 btrfs_file_extent_disk_bytenr(buf, fi),
7791                                 parent, owner, key.objectid, key.offset -
7792                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7793                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7794                 }
7795         } else {
7796                 int level;
7797                 struct btrfs_key first_key;
7798
7799                 first_key.objectid = 0;
7800
7801                 if (nritems > 0)
7802                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7803                 level = btrfs_header_level(buf);
7804                 for (i = 0; i < nritems; i++) {
7805                         struct extent_record tmpl;
7806
7807                         ptr = btrfs_node_blockptr(buf, i);
7808                         size = root->nodesize;
7809                         btrfs_node_key_to_cpu(buf, &key, i);
7810                         if (ri != NULL) {
7811                                 if ((level == ri->drop_level)
7812                                     && is_dropped_key(&key, &ri->drop_key)) {
7813                                         continue;
7814                                 }
7815                         }
7816
7817                         memset(&tmpl, 0, sizeof(tmpl));
7818                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7819                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7820                         tmpl.start = ptr;
7821                         tmpl.nr = size;
7822                         tmpl.refs = 1;
7823                         tmpl.metadata = 1;
7824                         tmpl.max_size = size;
7825                         ret = add_extent_rec(extent_cache, &tmpl);
7826                         if (ret < 0)
7827                                 goto out;
7828
7829                         ret = add_tree_backref(extent_cache, ptr, parent,
7830                                         owner, 1);
7831                         if (ret < 0) {
7832                                 error("add_tree_backref failed: %s",
7833                                       strerror(-ret));
7834                                 continue;
7835                         }
7836
7837                         if (level > 1) {
7838                                 add_pending(nodes, seen, ptr, size);
7839                         } else {
7840                                 add_pending(pending, seen, ptr, size);
7841                         }
7842                 }
7843                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7844                                       nritems) * sizeof(struct btrfs_key_ptr);
7845         }
7846         total_btree_bytes += buf->len;
7847         if (fs_root_objectid(btrfs_header_owner(buf)))
7848                 total_fs_tree_bytes += buf->len;
7849         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7850                 total_extent_tree_bytes += buf->len;
7851         if (!found_old_backref &&
7852             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7853             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7854             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7855                 found_old_backref = 1;
7856 out:
7857         free_extent_buffer(buf);
7858         return ret;
7859 }
7860
7861 static int add_root_to_pending(struct extent_buffer *buf,
7862                                struct cache_tree *extent_cache,
7863                                struct cache_tree *pending,
7864                                struct cache_tree *seen,
7865                                struct cache_tree *nodes,
7866                                u64 objectid)
7867 {
7868         struct extent_record tmpl;
7869         int ret;
7870
7871         if (btrfs_header_level(buf) > 0)
7872                 add_pending(nodes, seen, buf->start, buf->len);
7873         else
7874                 add_pending(pending, seen, buf->start, buf->len);
7875
7876         memset(&tmpl, 0, sizeof(tmpl));
7877         tmpl.start = buf->start;
7878         tmpl.nr = buf->len;
7879         tmpl.is_root = 1;
7880         tmpl.refs = 1;
7881         tmpl.metadata = 1;
7882         tmpl.max_size = buf->len;
7883         add_extent_rec(extent_cache, &tmpl);
7884
7885         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7886             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7887                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7888                                 0, 1);
7889         else
7890                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7891                                 1);
7892         return ret;
7893 }
7894
7895 /* as we fix the tree, we might be deleting blocks that
7896  * we're tracking for repair.  This hook makes sure we
7897  * remove any backrefs for blocks as we are fixing them.
7898  */
7899 static int free_extent_hook(struct btrfs_trans_handle *trans,
7900                             struct btrfs_root *root,
7901                             u64 bytenr, u64 num_bytes, u64 parent,
7902                             u64 root_objectid, u64 owner, u64 offset,
7903                             int refs_to_drop)
7904 {
7905         struct extent_record *rec;
7906         struct cache_extent *cache;
7907         int is_data;
7908         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7909
7910         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7911         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7912         if (!cache)
7913                 return 0;
7914
7915         rec = container_of(cache, struct extent_record, cache);
7916         if (is_data) {
7917                 struct data_backref *back;
7918                 back = find_data_backref(rec, parent, root_objectid, owner,
7919                                          offset, 1, bytenr, num_bytes);
7920                 if (!back)
7921                         goto out;
7922                 if (back->node.found_ref) {
7923                         back->found_ref -= refs_to_drop;
7924                         if (rec->refs)
7925                                 rec->refs -= refs_to_drop;
7926                 }
7927                 if (back->node.found_extent_tree) {
7928                         back->num_refs -= refs_to_drop;
7929                         if (rec->extent_item_refs)
7930                                 rec->extent_item_refs -= refs_to_drop;
7931                 }
7932                 if (back->found_ref == 0)
7933                         back->node.found_ref = 0;
7934                 if (back->num_refs == 0)
7935                         back->node.found_extent_tree = 0;
7936
7937                 if (!back->node.found_extent_tree && back->node.found_ref) {
7938                         list_del(&back->node.list);
7939                         free(back);
7940                 }
7941         } else {
7942                 struct tree_backref *back;
7943                 back = find_tree_backref(rec, parent, root_objectid);
7944                 if (!back)
7945                         goto out;
7946                 if (back->node.found_ref) {
7947                         if (rec->refs)
7948                                 rec->refs--;
7949                         back->node.found_ref = 0;
7950                 }
7951                 if (back->node.found_extent_tree) {
7952                         if (rec->extent_item_refs)
7953                                 rec->extent_item_refs--;
7954                         back->node.found_extent_tree = 0;
7955                 }
7956                 if (!back->node.found_extent_tree && back->node.found_ref) {
7957                         list_del(&back->node.list);
7958                         free(back);
7959                 }
7960         }
7961         maybe_free_extent_rec(extent_cache, rec);
7962 out:
7963         return 0;
7964 }
7965
7966 static int delete_extent_records(struct btrfs_trans_handle *trans,
7967                                  struct btrfs_root *root,
7968                                  struct btrfs_path *path,
7969                                  u64 bytenr)
7970 {
7971         struct btrfs_key key;
7972         struct btrfs_key found_key;
7973         struct extent_buffer *leaf;
7974         int ret;
7975         int slot;
7976
7977
7978         key.objectid = bytenr;
7979         key.type = (u8)-1;
7980         key.offset = (u64)-1;
7981
7982         while(1) {
7983                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7984                                         &key, path, 0, 1);
7985                 if (ret < 0)
7986                         break;
7987
7988                 if (ret > 0) {
7989                         ret = 0;
7990                         if (path->slots[0] == 0)
7991                                 break;
7992                         path->slots[0]--;
7993                 }
7994                 ret = 0;
7995
7996                 leaf = path->nodes[0];
7997                 slot = path->slots[0];
7998
7999                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8000                 if (found_key.objectid != bytenr)
8001                         break;
8002
8003                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8004                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8005                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8006                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8007                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8008                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8009                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8010                         btrfs_release_path(path);
8011                         if (found_key.type == 0) {
8012                                 if (found_key.offset == 0)
8013                                         break;
8014                                 key.offset = found_key.offset - 1;
8015                                 key.type = found_key.type;
8016                         }
8017                         key.type = found_key.type - 1;
8018                         key.offset = (u64)-1;
8019                         continue;
8020                 }
8021
8022                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8023                         found_key.objectid, found_key.type, found_key.offset);
8024
8025                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8026                 if (ret)
8027                         break;
8028                 btrfs_release_path(path);
8029
8030                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8031                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8032                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8033                                 found_key.offset : root->nodesize;
8034
8035                         ret = btrfs_update_block_group(trans, root, bytenr,
8036                                                        bytes, 0, 0);
8037                         if (ret)
8038                                 break;
8039                 }
8040         }
8041
8042         btrfs_release_path(path);
8043         return ret;
8044 }
8045
8046 /*
8047  * for a single backref, this will allocate a new extent
8048  * and add the backref to it.
8049  */
8050 static int record_extent(struct btrfs_trans_handle *trans,
8051                          struct btrfs_fs_info *info,
8052                          struct btrfs_path *path,
8053                          struct extent_record *rec,
8054                          struct extent_backref *back,
8055                          int allocated, u64 flags)
8056 {
8057         int ret = 0;
8058         struct btrfs_root *extent_root = info->extent_root;
8059         struct extent_buffer *leaf;
8060         struct btrfs_key ins_key;
8061         struct btrfs_extent_item *ei;
8062         struct data_backref *dback;
8063         struct btrfs_tree_block_info *bi;
8064
8065         if (!back->is_data)
8066                 rec->max_size = max_t(u64, rec->max_size,
8067                                     info->extent_root->nodesize);
8068
8069         if (!allocated) {
8070                 u32 item_size = sizeof(*ei);
8071
8072                 if (!back->is_data)
8073                         item_size += sizeof(*bi);
8074
8075                 ins_key.objectid = rec->start;
8076                 ins_key.offset = rec->max_size;
8077                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8078
8079                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8080                                         &ins_key, item_size);
8081                 if (ret)
8082                         goto fail;
8083
8084                 leaf = path->nodes[0];
8085                 ei = btrfs_item_ptr(leaf, path->slots[0],
8086                                     struct btrfs_extent_item);
8087
8088                 btrfs_set_extent_refs(leaf, ei, 0);
8089                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8090
8091                 if (back->is_data) {
8092                         btrfs_set_extent_flags(leaf, ei,
8093                                                BTRFS_EXTENT_FLAG_DATA);
8094                 } else {
8095                         struct btrfs_disk_key copy_key;;
8096
8097                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8098                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8099                                              sizeof(*bi));
8100
8101                         btrfs_set_disk_key_objectid(&copy_key,
8102                                                     rec->info_objectid);
8103                         btrfs_set_disk_key_type(&copy_key, 0);
8104                         btrfs_set_disk_key_offset(&copy_key, 0);
8105
8106                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8107                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8108
8109                         btrfs_set_extent_flags(leaf, ei,
8110                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8111                 }
8112
8113                 btrfs_mark_buffer_dirty(leaf);
8114                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8115                                                rec->max_size, 1, 0);
8116                 if (ret)
8117                         goto fail;
8118                 btrfs_release_path(path);
8119         }
8120
8121         if (back->is_data) {
8122                 u64 parent;
8123                 int i;
8124
8125                 dback = to_data_backref(back);
8126                 if (back->full_backref)
8127                         parent = dback->parent;
8128                 else
8129                         parent = 0;
8130
8131                 for (i = 0; i < dback->found_ref; i++) {
8132                         /* if parent != 0, we're doing a full backref
8133                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8134                          * just makes the backref allocator create a data
8135                          * backref
8136                          */
8137                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8138                                                    rec->start, rec->max_size,
8139                                                    parent,
8140                                                    dback->root,
8141                                                    parent ?
8142                                                    BTRFS_FIRST_FREE_OBJECTID :
8143                                                    dback->owner,
8144                                                    dback->offset);
8145                         if (ret)
8146                                 break;
8147                 }
8148                 fprintf(stderr, "adding new data backref"
8149                                 " on %llu %s %llu owner %llu"
8150                                 " offset %llu found %d\n",
8151                                 (unsigned long long)rec->start,
8152                                 back->full_backref ?
8153                                 "parent" : "root",
8154                                 back->full_backref ?
8155                                 (unsigned long long)parent :
8156                                 (unsigned long long)dback->root,
8157                                 (unsigned long long)dback->owner,
8158                                 (unsigned long long)dback->offset,
8159                                 dback->found_ref);
8160         } else {
8161                 u64 parent;
8162                 struct tree_backref *tback;
8163
8164                 tback = to_tree_backref(back);
8165                 if (back->full_backref)
8166                         parent = tback->parent;
8167                 else
8168                         parent = 0;
8169
8170                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8171                                            rec->start, rec->max_size,
8172                                            parent, tback->root, 0, 0);
8173                 fprintf(stderr, "adding new tree backref on "
8174                         "start %llu len %llu parent %llu root %llu\n",
8175                         rec->start, rec->max_size, parent, tback->root);
8176         }
8177 fail:
8178         btrfs_release_path(path);
8179         return ret;
8180 }
8181
8182 static struct extent_entry *find_entry(struct list_head *entries,
8183                                        u64 bytenr, u64 bytes)
8184 {
8185         struct extent_entry *entry = NULL;
8186
8187         list_for_each_entry(entry, entries, list) {
8188                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8189                         return entry;
8190         }
8191
8192         return NULL;
8193 }
8194
8195 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8196 {
8197         struct extent_entry *entry, *best = NULL, *prev = NULL;
8198
8199         list_for_each_entry(entry, entries, list) {
8200                 /*
8201                  * If there are as many broken entries as entries then we know
8202                  * not to trust this particular entry.
8203                  */
8204                 if (entry->broken == entry->count)
8205                         continue;
8206
8207                 /*
8208                  * Special case, when there are only two entries and 'best' is
8209                  * the first one
8210                  */
8211                 if (!prev) {
8212                         best = entry;
8213                         prev = entry;
8214                         continue;
8215                 }
8216
8217                 /*
8218                  * If our current entry == best then we can't be sure our best
8219                  * is really the best, so we need to keep searching.
8220                  */
8221                 if (best && best->count == entry->count) {
8222                         prev = entry;
8223                         best = NULL;
8224                         continue;
8225                 }
8226
8227                 /* Prev == entry, not good enough, have to keep searching */
8228                 if (!prev->broken && prev->count == entry->count)
8229                         continue;
8230
8231                 if (!best)
8232                         best = (prev->count > entry->count) ? prev : entry;
8233                 else if (best->count < entry->count)
8234                         best = entry;
8235                 prev = entry;
8236         }
8237
8238         return best;
8239 }
8240
8241 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8242                       struct data_backref *dback, struct extent_entry *entry)
8243 {
8244         struct btrfs_trans_handle *trans;
8245         struct btrfs_root *root;
8246         struct btrfs_file_extent_item *fi;
8247         struct extent_buffer *leaf;
8248         struct btrfs_key key;
8249         u64 bytenr, bytes;
8250         int ret, err;
8251
8252         key.objectid = dback->root;
8253         key.type = BTRFS_ROOT_ITEM_KEY;
8254         key.offset = (u64)-1;
8255         root = btrfs_read_fs_root(info, &key);
8256         if (IS_ERR(root)) {
8257                 fprintf(stderr, "Couldn't find root for our ref\n");
8258                 return -EINVAL;
8259         }
8260
8261         /*
8262          * The backref points to the original offset of the extent if it was
8263          * split, so we need to search down to the offset we have and then walk
8264          * forward until we find the backref we're looking for.
8265          */
8266         key.objectid = dback->owner;
8267         key.type = BTRFS_EXTENT_DATA_KEY;
8268         key.offset = dback->offset;
8269         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8270         if (ret < 0) {
8271                 fprintf(stderr, "Error looking up ref %d\n", ret);
8272                 return ret;
8273         }
8274
8275         while (1) {
8276                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8277                         ret = btrfs_next_leaf(root, path);
8278                         if (ret) {
8279                                 fprintf(stderr, "Couldn't find our ref, next\n");
8280                                 return -EINVAL;
8281                         }
8282                 }
8283                 leaf = path->nodes[0];
8284                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8285                 if (key.objectid != dback->owner ||
8286                     key.type != BTRFS_EXTENT_DATA_KEY) {
8287                         fprintf(stderr, "Couldn't find our ref, search\n");
8288                         return -EINVAL;
8289                 }
8290                 fi = btrfs_item_ptr(leaf, path->slots[0],
8291                                     struct btrfs_file_extent_item);
8292                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8293                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8294
8295                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8296                         break;
8297                 path->slots[0]++;
8298         }
8299
8300         btrfs_release_path(path);
8301
8302         trans = btrfs_start_transaction(root, 1);
8303         if (IS_ERR(trans))
8304                 return PTR_ERR(trans);
8305
8306         /*
8307          * Ok we have the key of the file extent we want to fix, now we can cow
8308          * down to the thing and fix it.
8309          */
8310         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8311         if (ret < 0) {
8312                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8313                         key.objectid, key.type, key.offset, ret);
8314                 goto out;
8315         }
8316         if (ret > 0) {
8317                 fprintf(stderr, "Well that's odd, we just found this key "
8318                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8319                         key.offset);
8320                 ret = -EINVAL;
8321                 goto out;
8322         }
8323         leaf = path->nodes[0];
8324         fi = btrfs_item_ptr(leaf, path->slots[0],
8325                             struct btrfs_file_extent_item);
8326
8327         if (btrfs_file_extent_compression(leaf, fi) &&
8328             dback->disk_bytenr != entry->bytenr) {
8329                 fprintf(stderr, "Ref doesn't match the record start and is "
8330                         "compressed, please take a btrfs-image of this file "
8331                         "system and send it to a btrfs developer so they can "
8332                         "complete this functionality for bytenr %Lu\n",
8333                         dback->disk_bytenr);
8334                 ret = -EINVAL;
8335                 goto out;
8336         }
8337
8338         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8339                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8340         } else if (dback->disk_bytenr > entry->bytenr) {
8341                 u64 off_diff, offset;
8342
8343                 off_diff = dback->disk_bytenr - entry->bytenr;
8344                 offset = btrfs_file_extent_offset(leaf, fi);
8345                 if (dback->disk_bytenr + offset +
8346                     btrfs_file_extent_num_bytes(leaf, fi) >
8347                     entry->bytenr + entry->bytes) {
8348                         fprintf(stderr, "Ref is past the entry end, please "
8349                                 "take a btrfs-image of this file system and "
8350                                 "send it to a btrfs developer, ref %Lu\n",
8351                                 dback->disk_bytenr);
8352                         ret = -EINVAL;
8353                         goto out;
8354                 }
8355                 offset += off_diff;
8356                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8357                 btrfs_set_file_extent_offset(leaf, fi, offset);
8358         } else if (dback->disk_bytenr < entry->bytenr) {
8359                 u64 offset;
8360
8361                 offset = btrfs_file_extent_offset(leaf, fi);
8362                 if (dback->disk_bytenr + offset < entry->bytenr) {
8363                         fprintf(stderr, "Ref is before the entry start, please"
8364                                 " take a btrfs-image of this file system and "
8365                                 "send it to a btrfs developer, ref %Lu\n",
8366                                 dback->disk_bytenr);
8367                         ret = -EINVAL;
8368                         goto out;
8369                 }
8370
8371                 offset += dback->disk_bytenr;
8372                 offset -= entry->bytenr;
8373                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8374                 btrfs_set_file_extent_offset(leaf, fi, offset);
8375         }
8376
8377         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8378
8379         /*
8380          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8381          * only do this if we aren't using compression, otherwise it's a
8382          * trickier case.
8383          */
8384         if (!btrfs_file_extent_compression(leaf, fi))
8385                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8386         else
8387                 printf("ram bytes may be wrong?\n");
8388         btrfs_mark_buffer_dirty(leaf);
8389 out:
8390         err = btrfs_commit_transaction(trans, root);
8391         btrfs_release_path(path);
8392         return ret ? ret : err;
8393 }
8394
8395 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8396                            struct extent_record *rec)
8397 {
8398         struct extent_backref *back;
8399         struct data_backref *dback;
8400         struct extent_entry *entry, *best = NULL;
8401         LIST_HEAD(entries);
8402         int nr_entries = 0;
8403         int broken_entries = 0;
8404         int ret = 0;
8405         short mismatch = 0;
8406
8407         /*
8408          * Metadata is easy and the backrefs should always agree on bytenr and
8409          * size, if not we've got bigger issues.
8410          */
8411         if (rec->metadata)
8412                 return 0;
8413
8414         list_for_each_entry(back, &rec->backrefs, list) {
8415                 if (back->full_backref || !back->is_data)
8416                         continue;
8417
8418                 dback = to_data_backref(back);
8419
8420                 /*
8421                  * We only pay attention to backrefs that we found a real
8422                  * backref for.
8423                  */
8424                 if (dback->found_ref == 0)
8425                         continue;
8426
8427                 /*
8428                  * For now we only catch when the bytes don't match, not the
8429                  * bytenr.  We can easily do this at the same time, but I want
8430                  * to have a fs image to test on before we just add repair
8431                  * functionality willy-nilly so we know we won't screw up the
8432                  * repair.
8433                  */
8434
8435                 entry = find_entry(&entries, dback->disk_bytenr,
8436                                    dback->bytes);
8437                 if (!entry) {
8438                         entry = malloc(sizeof(struct extent_entry));
8439                         if (!entry) {
8440                                 ret = -ENOMEM;
8441                                 goto out;
8442                         }
8443                         memset(entry, 0, sizeof(*entry));
8444                         entry->bytenr = dback->disk_bytenr;
8445                         entry->bytes = dback->bytes;
8446                         list_add_tail(&entry->list, &entries);
8447                         nr_entries++;
8448                 }
8449
8450                 /*
8451                  * If we only have on entry we may think the entries agree when
8452                  * in reality they don't so we have to do some extra checking.
8453                  */
8454                 if (dback->disk_bytenr != rec->start ||
8455                     dback->bytes != rec->nr || back->broken)
8456                         mismatch = 1;
8457
8458                 if (back->broken) {
8459                         entry->broken++;
8460                         broken_entries++;
8461                 }
8462
8463                 entry->count++;
8464         }
8465
8466         /* Yay all the backrefs agree, carry on good sir */
8467         if (nr_entries <= 1 && !mismatch)
8468                 goto out;
8469
8470         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8471                 "%Lu\n", rec->start);
8472
8473         /*
8474          * First we want to see if the backrefs can agree amongst themselves who
8475          * is right, so figure out which one of the entries has the highest
8476          * count.
8477          */
8478         best = find_most_right_entry(&entries);
8479
8480         /*
8481          * Ok so we may have an even split between what the backrefs think, so
8482          * this is where we use the extent ref to see what it thinks.
8483          */
8484         if (!best) {
8485                 entry = find_entry(&entries, rec->start, rec->nr);
8486                 if (!entry && (!broken_entries || !rec->found_rec)) {
8487                         fprintf(stderr, "Backrefs don't agree with each other "
8488                                 "and extent record doesn't agree with anybody,"
8489                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8490                                 rec->start, rec->nr);
8491                         ret = -EINVAL;
8492                         goto out;
8493                 } else if (!entry) {
8494                         /*
8495                          * Ok our backrefs were broken, we'll assume this is the
8496                          * correct value and add an entry for this range.
8497                          */
8498                         entry = malloc(sizeof(struct extent_entry));
8499                         if (!entry) {
8500                                 ret = -ENOMEM;
8501                                 goto out;
8502                         }
8503                         memset(entry, 0, sizeof(*entry));
8504                         entry->bytenr = rec->start;
8505                         entry->bytes = rec->nr;
8506                         list_add_tail(&entry->list, &entries);
8507                         nr_entries++;
8508                 }
8509                 entry->count++;
8510                 best = find_most_right_entry(&entries);
8511                 if (!best) {
8512                         fprintf(stderr, "Backrefs and extent record evenly "
8513                                 "split on who is right, this is going to "
8514                                 "require user input to fix bytenr %Lu bytes "
8515                                 "%Lu\n", rec->start, rec->nr);
8516                         ret = -EINVAL;
8517                         goto out;
8518                 }
8519         }
8520
8521         /*
8522          * I don't think this can happen currently as we'll abort() if we catch
8523          * this case higher up, but in case somebody removes that we still can't
8524          * deal with it properly here yet, so just bail out of that's the case.
8525          */
8526         if (best->bytenr != rec->start) {
8527                 fprintf(stderr, "Extent start and backref starts don't match, "
8528                         "please use btrfs-image on this file system and send "
8529                         "it to a btrfs developer so they can make fsck fix "
8530                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8531                         rec->start, rec->nr);
8532                 ret = -EINVAL;
8533                 goto out;
8534         }
8535
8536         /*
8537          * Ok great we all agreed on an extent record, let's go find the real
8538          * references and fix up the ones that don't match.
8539          */
8540         list_for_each_entry(back, &rec->backrefs, list) {
8541                 if (back->full_backref || !back->is_data)
8542                         continue;
8543
8544                 dback = to_data_backref(back);
8545
8546                 /*
8547                  * Still ignoring backrefs that don't have a real ref attached
8548                  * to them.
8549                  */
8550                 if (dback->found_ref == 0)
8551                         continue;
8552
8553                 if (dback->bytes == best->bytes &&
8554                     dback->disk_bytenr == best->bytenr)
8555                         continue;
8556
8557                 ret = repair_ref(info, path, dback, best);
8558                 if (ret)
8559                         goto out;
8560         }
8561
8562         /*
8563          * Ok we messed with the actual refs, which means we need to drop our
8564          * entire cache and go back and rescan.  I know this is a huge pain and
8565          * adds a lot of extra work, but it's the only way to be safe.  Once all
8566          * the backrefs agree we may not need to do anything to the extent
8567          * record itself.
8568          */
8569         ret = -EAGAIN;
8570 out:
8571         while (!list_empty(&entries)) {
8572                 entry = list_entry(entries.next, struct extent_entry, list);
8573                 list_del_init(&entry->list);
8574                 free(entry);
8575         }
8576         return ret;
8577 }
8578
8579 static int process_duplicates(struct btrfs_root *root,
8580                               struct cache_tree *extent_cache,
8581                               struct extent_record *rec)
8582 {
8583         struct extent_record *good, *tmp;
8584         struct cache_extent *cache;
8585         int ret;
8586
8587         /*
8588          * If we found a extent record for this extent then return, or if we
8589          * have more than one duplicate we are likely going to need to delete
8590          * something.
8591          */
8592         if (rec->found_rec || rec->num_duplicates > 1)
8593                 return 0;
8594
8595         /* Shouldn't happen but just in case */
8596         BUG_ON(!rec->num_duplicates);
8597
8598         /*
8599          * So this happens if we end up with a backref that doesn't match the
8600          * actual extent entry.  So either the backref is bad or the extent
8601          * entry is bad.  Either way we want to have the extent_record actually
8602          * reflect what we found in the extent_tree, so we need to take the
8603          * duplicate out and use that as the extent_record since the only way we
8604          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8605          */
8606         remove_cache_extent(extent_cache, &rec->cache);
8607
8608         good = to_extent_record(rec->dups.next);
8609         list_del_init(&good->list);
8610         INIT_LIST_HEAD(&good->backrefs);
8611         INIT_LIST_HEAD(&good->dups);
8612         good->cache.start = good->start;
8613         good->cache.size = good->nr;
8614         good->content_checked = 0;
8615         good->owner_ref_checked = 0;
8616         good->num_duplicates = 0;
8617         good->refs = rec->refs;
8618         list_splice_init(&rec->backrefs, &good->backrefs);
8619         while (1) {
8620                 cache = lookup_cache_extent(extent_cache, good->start,
8621                                             good->nr);
8622                 if (!cache)
8623                         break;
8624                 tmp = container_of(cache, struct extent_record, cache);
8625
8626                 /*
8627                  * If we find another overlapping extent and it's found_rec is
8628                  * set then it's a duplicate and we need to try and delete
8629                  * something.
8630                  */
8631                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8632                         if (list_empty(&good->list))
8633                                 list_add_tail(&good->list,
8634                                               &duplicate_extents);
8635                         good->num_duplicates += tmp->num_duplicates + 1;
8636                         list_splice_init(&tmp->dups, &good->dups);
8637                         list_del_init(&tmp->list);
8638                         list_add_tail(&tmp->list, &good->dups);
8639                         remove_cache_extent(extent_cache, &tmp->cache);
8640                         continue;
8641                 }
8642
8643                 /*
8644                  * Ok we have another non extent item backed extent rec, so lets
8645                  * just add it to this extent and carry on like we did above.
8646                  */
8647                 good->refs += tmp->refs;
8648                 list_splice_init(&tmp->backrefs, &good->backrefs);
8649                 remove_cache_extent(extent_cache, &tmp->cache);
8650                 free(tmp);
8651         }
8652         ret = insert_cache_extent(extent_cache, &good->cache);
8653         BUG_ON(ret);
8654         free(rec);
8655         return good->num_duplicates ? 0 : 1;
8656 }
8657
8658 static int delete_duplicate_records(struct btrfs_root *root,
8659                                     struct extent_record *rec)
8660 {
8661         struct btrfs_trans_handle *trans;
8662         LIST_HEAD(delete_list);
8663         struct btrfs_path path;
8664         struct extent_record *tmp, *good, *n;
8665         int nr_del = 0;
8666         int ret = 0, err;
8667         struct btrfs_key key;
8668
8669         btrfs_init_path(&path);
8670
8671         good = rec;
8672         /* Find the record that covers all of the duplicates. */
8673         list_for_each_entry(tmp, &rec->dups, list) {
8674                 if (good->start < tmp->start)
8675                         continue;
8676                 if (good->nr > tmp->nr)
8677                         continue;
8678
8679                 if (tmp->start + tmp->nr < good->start + good->nr) {
8680                         fprintf(stderr, "Ok we have overlapping extents that "
8681                                 "aren't completely covered by each other, this "
8682                                 "is going to require more careful thought.  "
8683                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8684                                 tmp->start, tmp->nr, good->start, good->nr);
8685                         abort();
8686                 }
8687                 good = tmp;
8688         }
8689
8690         if (good != rec)
8691                 list_add_tail(&rec->list, &delete_list);
8692
8693         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8694                 if (tmp == good)
8695                         continue;
8696                 list_move_tail(&tmp->list, &delete_list);
8697         }
8698
8699         root = root->fs_info->extent_root;
8700         trans = btrfs_start_transaction(root, 1);
8701         if (IS_ERR(trans)) {
8702                 ret = PTR_ERR(trans);
8703                 goto out;
8704         }
8705
8706         list_for_each_entry(tmp, &delete_list, list) {
8707                 if (tmp->found_rec == 0)
8708                         continue;
8709                 key.objectid = tmp->start;
8710                 key.type = BTRFS_EXTENT_ITEM_KEY;
8711                 key.offset = tmp->nr;
8712
8713                 /* Shouldn't happen but just in case */
8714                 if (tmp->metadata) {
8715                         fprintf(stderr, "Well this shouldn't happen, extent "
8716                                 "record overlaps but is metadata? "
8717                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8718                         abort();
8719                 }
8720
8721                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8722                 if (ret) {
8723                         if (ret > 0)
8724                                 ret = -EINVAL;
8725                         break;
8726                 }
8727                 ret = btrfs_del_item(trans, root, &path);
8728                 if (ret)
8729                         break;
8730                 btrfs_release_path(&path);
8731                 nr_del++;
8732         }
8733         err = btrfs_commit_transaction(trans, root);
8734         if (err && !ret)
8735                 ret = err;
8736 out:
8737         while (!list_empty(&delete_list)) {
8738                 tmp = to_extent_record(delete_list.next);
8739                 list_del_init(&tmp->list);
8740                 if (tmp == rec)
8741                         continue;
8742                 free(tmp);
8743         }
8744
8745         while (!list_empty(&rec->dups)) {
8746                 tmp = to_extent_record(rec->dups.next);
8747                 list_del_init(&tmp->list);
8748                 free(tmp);
8749         }
8750
8751         btrfs_release_path(&path);
8752
8753         if (!ret && !nr_del)
8754                 rec->num_duplicates = 0;
8755
8756         return ret ? ret : nr_del;
8757 }
8758
8759 static int find_possible_backrefs(struct btrfs_fs_info *info,
8760                                   struct btrfs_path *path,
8761                                   struct cache_tree *extent_cache,
8762                                   struct extent_record *rec)
8763 {
8764         struct btrfs_root *root;
8765         struct extent_backref *back;
8766         struct data_backref *dback;
8767         struct cache_extent *cache;
8768         struct btrfs_file_extent_item *fi;
8769         struct btrfs_key key;
8770         u64 bytenr, bytes;
8771         int ret;
8772
8773         list_for_each_entry(back, &rec->backrefs, list) {
8774                 /* Don't care about full backrefs (poor unloved backrefs) */
8775                 if (back->full_backref || !back->is_data)
8776                         continue;
8777
8778                 dback = to_data_backref(back);
8779
8780                 /* We found this one, we don't need to do a lookup */
8781                 if (dback->found_ref)
8782                         continue;
8783
8784                 key.objectid = dback->root;
8785                 key.type = BTRFS_ROOT_ITEM_KEY;
8786                 key.offset = (u64)-1;
8787
8788                 root = btrfs_read_fs_root(info, &key);
8789
8790                 /* No root, definitely a bad ref, skip */
8791                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8792                         continue;
8793                 /* Other err, exit */
8794                 if (IS_ERR(root))
8795                         return PTR_ERR(root);
8796
8797                 key.objectid = dback->owner;
8798                 key.type = BTRFS_EXTENT_DATA_KEY;
8799                 key.offset = dback->offset;
8800                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8801                 if (ret) {
8802                         btrfs_release_path(path);
8803                         if (ret < 0)
8804                                 return ret;
8805                         /* Didn't find it, we can carry on */
8806                         ret = 0;
8807                         continue;
8808                 }
8809
8810                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8811                                     struct btrfs_file_extent_item);
8812                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8813                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8814                 btrfs_release_path(path);
8815                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8816                 if (cache) {
8817                         struct extent_record *tmp;
8818                         tmp = container_of(cache, struct extent_record, cache);
8819
8820                         /*
8821                          * If we found an extent record for the bytenr for this
8822                          * particular backref then we can't add it to our
8823                          * current extent record.  We only want to add backrefs
8824                          * that don't have a corresponding extent item in the
8825                          * extent tree since they likely belong to this record
8826                          * and we need to fix it if it doesn't match bytenrs.
8827                          */
8828                         if  (tmp->found_rec)
8829                                 continue;
8830                 }
8831
8832                 dback->found_ref += 1;
8833                 dback->disk_bytenr = bytenr;
8834                 dback->bytes = bytes;
8835
8836                 /*
8837                  * Set this so the verify backref code knows not to trust the
8838                  * values in this backref.
8839                  */
8840                 back->broken = 1;
8841         }
8842
8843         return 0;
8844 }
8845
8846 /*
8847  * Record orphan data ref into corresponding root.
8848  *
8849  * Return 0 if the extent item contains data ref and recorded.
8850  * Return 1 if the extent item contains no useful data ref
8851  *   On that case, it may contains only shared_dataref or metadata backref
8852  *   or the file extent exists(this should be handled by the extent bytenr
8853  *   recovery routine)
8854  * Return <0 if something goes wrong.
8855  */
8856 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8857                                       struct extent_record *rec)
8858 {
8859         struct btrfs_key key;
8860         struct btrfs_root *dest_root;
8861         struct extent_backref *back;
8862         struct data_backref *dback;
8863         struct orphan_data_extent *orphan;
8864         struct btrfs_path path;
8865         int recorded_data_ref = 0;
8866         int ret = 0;
8867
8868         if (rec->metadata)
8869                 return 1;
8870         btrfs_init_path(&path);
8871         list_for_each_entry(back, &rec->backrefs, list) {
8872                 if (back->full_backref || !back->is_data ||
8873                     !back->found_extent_tree)
8874                         continue;
8875                 dback = to_data_backref(back);
8876                 if (dback->found_ref)
8877                         continue;
8878                 key.objectid = dback->root;
8879                 key.type = BTRFS_ROOT_ITEM_KEY;
8880                 key.offset = (u64)-1;
8881
8882                 dest_root = btrfs_read_fs_root(fs_info, &key);
8883
8884                 /* For non-exist root we just skip it */
8885                 if (IS_ERR(dest_root) || !dest_root)
8886                         continue;
8887
8888                 key.objectid = dback->owner;
8889                 key.type = BTRFS_EXTENT_DATA_KEY;
8890                 key.offset = dback->offset;
8891
8892                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8893                 btrfs_release_path(&path);
8894                 /*
8895                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8896                  * we need to record it for inode/file extent rebuild.
8897                  * For ret > 0, we record it only for file extent rebuild.
8898                  * For ret == 0, the file extent exists but only bytenr
8899                  * mismatch, let the original bytenr fix routine to handle,
8900                  * don't record it.
8901                  */
8902                 if (ret == 0)
8903                         continue;
8904                 ret = 0;
8905                 orphan = malloc(sizeof(*orphan));
8906                 if (!orphan) {
8907                         ret = -ENOMEM;
8908                         goto out;
8909                 }
8910                 INIT_LIST_HEAD(&orphan->list);
8911                 orphan->root = dback->root;
8912                 orphan->objectid = dback->owner;
8913                 orphan->offset = dback->offset;
8914                 orphan->disk_bytenr = rec->cache.start;
8915                 orphan->disk_len = rec->cache.size;
8916                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8917                 recorded_data_ref = 1;
8918         }
8919 out:
8920         btrfs_release_path(&path);
8921         if (!ret)
8922                 return !recorded_data_ref;
8923         else
8924                 return ret;
8925 }
8926
8927 /*
8928  * when an incorrect extent item is found, this will delete
8929  * all of the existing entries for it and recreate them
8930  * based on what the tree scan found.
8931  */
8932 static int fixup_extent_refs(struct btrfs_fs_info *info,
8933                              struct cache_tree *extent_cache,
8934                              struct extent_record *rec)
8935 {
8936         struct btrfs_trans_handle *trans = NULL;
8937         int ret;
8938         struct btrfs_path path;
8939         struct list_head *cur = rec->backrefs.next;
8940         struct cache_extent *cache;
8941         struct extent_backref *back;
8942         int allocated = 0;
8943         u64 flags = 0;
8944
8945         if (rec->flag_block_full_backref)
8946                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8947
8948         btrfs_init_path(&path);
8949         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8950                 /*
8951                  * Sometimes the backrefs themselves are so broken they don't
8952                  * get attached to any meaningful rec, so first go back and
8953                  * check any of our backrefs that we couldn't find and throw
8954                  * them into the list if we find the backref so that
8955                  * verify_backrefs can figure out what to do.
8956                  */
8957                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8958                 if (ret < 0)
8959                         goto out;
8960         }
8961
8962         /* step one, make sure all of the backrefs agree */
8963         ret = verify_backrefs(info, &path, rec);
8964         if (ret < 0)
8965                 goto out;
8966
8967         trans = btrfs_start_transaction(info->extent_root, 1);
8968         if (IS_ERR(trans)) {
8969                 ret = PTR_ERR(trans);
8970                 goto out;
8971         }
8972
8973         /* step two, delete all the existing records */
8974         ret = delete_extent_records(trans, info->extent_root, &path,
8975                                     rec->start);
8976
8977         if (ret < 0)
8978                 goto out;
8979
8980         /* was this block corrupt?  If so, don't add references to it */
8981         cache = lookup_cache_extent(info->corrupt_blocks,
8982                                     rec->start, rec->max_size);
8983         if (cache) {
8984                 ret = 0;
8985                 goto out;
8986         }
8987
8988         /* step three, recreate all the refs we did find */
8989         while(cur != &rec->backrefs) {
8990                 back = to_extent_backref(cur);
8991                 cur = cur->next;
8992
8993                 /*
8994                  * if we didn't find any references, don't create a
8995                  * new extent record
8996                  */
8997                 if (!back->found_ref)
8998                         continue;
8999
9000                 rec->bad_full_backref = 0;
9001                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9002                 allocated = 1;
9003
9004                 if (ret)
9005                         goto out;
9006         }
9007 out:
9008         if (trans) {
9009                 int err = btrfs_commit_transaction(trans, info->extent_root);
9010                 if (!ret)
9011                         ret = err;
9012         }
9013
9014         if (!ret)
9015                 fprintf(stderr, "Repaired extent references for %llu\n",
9016                                 (unsigned long long)rec->start);
9017
9018         btrfs_release_path(&path);
9019         return ret;
9020 }
9021
9022 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9023                               struct extent_record *rec)
9024 {
9025         struct btrfs_trans_handle *trans;
9026         struct btrfs_root *root = fs_info->extent_root;
9027         struct btrfs_path path;
9028         struct btrfs_extent_item *ei;
9029         struct btrfs_key key;
9030         u64 flags;
9031         int ret = 0;
9032
9033         key.objectid = rec->start;
9034         if (rec->metadata) {
9035                 key.type = BTRFS_METADATA_ITEM_KEY;
9036                 key.offset = rec->info_level;
9037         } else {
9038                 key.type = BTRFS_EXTENT_ITEM_KEY;
9039                 key.offset = rec->max_size;
9040         }
9041
9042         trans = btrfs_start_transaction(root, 0);
9043         if (IS_ERR(trans))
9044                 return PTR_ERR(trans);
9045
9046         btrfs_init_path(&path);
9047         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9048         if (ret < 0) {
9049                 btrfs_release_path(&path);
9050                 btrfs_commit_transaction(trans, root);
9051                 return ret;
9052         } else if (ret) {
9053                 fprintf(stderr, "Didn't find extent for %llu\n",
9054                         (unsigned long long)rec->start);
9055                 btrfs_release_path(&path);
9056                 btrfs_commit_transaction(trans, root);
9057                 return -ENOENT;
9058         }
9059
9060         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9061                             struct btrfs_extent_item);
9062         flags = btrfs_extent_flags(path.nodes[0], ei);
9063         if (rec->flag_block_full_backref) {
9064                 fprintf(stderr, "setting full backref on %llu\n",
9065                         (unsigned long long)key.objectid);
9066                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9067         } else {
9068                 fprintf(stderr, "clearing full backref on %llu\n",
9069                         (unsigned long long)key.objectid);
9070                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9071         }
9072         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9073         btrfs_mark_buffer_dirty(path.nodes[0]);
9074         btrfs_release_path(&path);
9075         ret = btrfs_commit_transaction(trans, root);
9076         if (!ret)
9077                 fprintf(stderr, "Repaired extent flags for %llu\n",
9078                                 (unsigned long long)rec->start);
9079
9080         return ret;
9081 }
9082
9083 /* right now we only prune from the extent allocation tree */
9084 static int prune_one_block(struct btrfs_trans_handle *trans,
9085                            struct btrfs_fs_info *info,
9086                            struct btrfs_corrupt_block *corrupt)
9087 {
9088         int ret;
9089         struct btrfs_path path;
9090         struct extent_buffer *eb;
9091         u64 found;
9092         int slot;
9093         int nritems;
9094         int level = corrupt->level + 1;
9095
9096         btrfs_init_path(&path);
9097 again:
9098         /* we want to stop at the parent to our busted block */
9099         path.lowest_level = level;
9100
9101         ret = btrfs_search_slot(trans, info->extent_root,
9102                                 &corrupt->key, &path, -1, 1);
9103
9104         if (ret < 0)
9105                 goto out;
9106
9107         eb = path.nodes[level];
9108         if (!eb) {
9109                 ret = -ENOENT;
9110                 goto out;
9111         }
9112
9113         /*
9114          * hopefully the search gave us the block we want to prune,
9115          * lets try that first
9116          */
9117         slot = path.slots[level];
9118         found =  btrfs_node_blockptr(eb, slot);
9119         if (found == corrupt->cache.start)
9120                 goto del_ptr;
9121
9122         nritems = btrfs_header_nritems(eb);
9123
9124         /* the search failed, lets scan this node and hope we find it */
9125         for (slot = 0; slot < nritems; slot++) {
9126                 found =  btrfs_node_blockptr(eb, slot);
9127                 if (found == corrupt->cache.start)
9128                         goto del_ptr;
9129         }
9130         /*
9131          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9132          * to this block
9133          */
9134         if (eb == info->extent_root->node) {
9135                 ret = -ENOENT;
9136                 goto out;
9137         } else {
9138                 level++;
9139                 btrfs_release_path(&path);
9140                 goto again;
9141         }
9142
9143 del_ptr:
9144         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9145         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9146
9147 out:
9148         btrfs_release_path(&path);
9149         return ret;
9150 }
9151
9152 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9153 {
9154         struct btrfs_trans_handle *trans = NULL;
9155         struct cache_extent *cache;
9156         struct btrfs_corrupt_block *corrupt;
9157
9158         while (1) {
9159                 cache = search_cache_extent(info->corrupt_blocks, 0);
9160                 if (!cache)
9161                         break;
9162                 if (!trans) {
9163                         trans = btrfs_start_transaction(info->extent_root, 1);
9164                         if (IS_ERR(trans))
9165                                 return PTR_ERR(trans);
9166                 }
9167                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9168                 prune_one_block(trans, info, corrupt);
9169                 remove_cache_extent(info->corrupt_blocks, cache);
9170         }
9171         if (trans)
9172                 return btrfs_commit_transaction(trans, info->extent_root);
9173         return 0;
9174 }
9175
9176 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9177 {
9178         struct btrfs_block_group_cache *cache;
9179         u64 start, end;
9180         int ret;
9181
9182         while (1) {
9183                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9184                                             &start, &end, EXTENT_DIRTY);
9185                 if (ret)
9186                         break;
9187                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9188         }
9189
9190         start = 0;
9191         while (1) {
9192                 cache = btrfs_lookup_first_block_group(fs_info, start);
9193                 if (!cache)
9194                         break;
9195                 if (cache->cached)
9196                         cache->cached = 0;
9197                 start = cache->key.objectid + cache->key.offset;
9198         }
9199 }
9200
9201 static int check_extent_refs(struct btrfs_root *root,
9202                              struct cache_tree *extent_cache)
9203 {
9204         struct extent_record *rec;
9205         struct cache_extent *cache;
9206         int ret = 0;
9207         int had_dups = 0;
9208
9209         if (repair) {
9210                 /*
9211                  * if we're doing a repair, we have to make sure
9212                  * we don't allocate from the problem extents.
9213                  * In the worst case, this will be all the
9214                  * extents in the FS
9215                  */
9216                 cache = search_cache_extent(extent_cache, 0);
9217                 while(cache) {
9218                         rec = container_of(cache, struct extent_record, cache);
9219                         set_extent_dirty(root->fs_info->excluded_extents,
9220                                          rec->start,
9221                                          rec->start + rec->max_size - 1);
9222                         cache = next_cache_extent(cache);
9223                 }
9224
9225                 /* pin down all the corrupted blocks too */
9226                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9227                 while(cache) {
9228                         set_extent_dirty(root->fs_info->excluded_extents,
9229                                          cache->start,
9230                                          cache->start + cache->size - 1);
9231                         cache = next_cache_extent(cache);
9232                 }
9233                 prune_corrupt_blocks(root->fs_info);
9234                 reset_cached_block_groups(root->fs_info);
9235         }
9236
9237         reset_cached_block_groups(root->fs_info);
9238
9239         /*
9240          * We need to delete any duplicate entries we find first otherwise we
9241          * could mess up the extent tree when we have backrefs that actually
9242          * belong to a different extent item and not the weird duplicate one.
9243          */
9244         while (repair && !list_empty(&duplicate_extents)) {
9245                 rec = to_extent_record(duplicate_extents.next);
9246                 list_del_init(&rec->list);
9247
9248                 /* Sometimes we can find a backref before we find an actual
9249                  * extent, so we need to process it a little bit to see if there
9250                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9251                  * if this is a backref screwup.  If we need to delete stuff
9252                  * process_duplicates() will return 0, otherwise it will return
9253                  * 1 and we
9254                  */
9255                 if (process_duplicates(root, extent_cache, rec))
9256                         continue;
9257                 ret = delete_duplicate_records(root, rec);
9258                 if (ret < 0)
9259                         return ret;
9260                 /*
9261                  * delete_duplicate_records will return the number of entries
9262                  * deleted, so if it's greater than 0 then we know we actually
9263                  * did something and we need to remove.
9264                  */
9265                 if (ret)
9266                         had_dups = 1;
9267         }
9268
9269         if (had_dups)
9270                 return -EAGAIN;
9271
9272         while(1) {
9273                 int cur_err = 0;
9274                 int fix = 0;
9275
9276                 cache = search_cache_extent(extent_cache, 0);
9277                 if (!cache)
9278                         break;
9279                 rec = container_of(cache, struct extent_record, cache);
9280                 if (rec->num_duplicates) {
9281                         fprintf(stderr, "extent item %llu has multiple extent "
9282                                 "items\n", (unsigned long long)rec->start);
9283                         cur_err = 1;
9284                 }
9285
9286                 if (rec->refs != rec->extent_item_refs) {
9287                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9288                                 (unsigned long long)rec->start,
9289                                 (unsigned long long)rec->nr);
9290                         fprintf(stderr, "extent item %llu, found %llu\n",
9291                                 (unsigned long long)rec->extent_item_refs,
9292                                 (unsigned long long)rec->refs);
9293                         ret = record_orphan_data_extents(root->fs_info, rec);
9294                         if (ret < 0)
9295                                 goto repair_abort;
9296                         fix = ret;
9297                         cur_err = 1;
9298                 }
9299                 if (all_backpointers_checked(rec, 1)) {
9300                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9301                                 (unsigned long long)rec->start,
9302                                 (unsigned long long)rec->nr);
9303                         fix = 1;
9304                         cur_err = 1;
9305                 }
9306                 if (!rec->owner_ref_checked) {
9307                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9308                                 (unsigned long long)rec->start,
9309                                 (unsigned long long)rec->nr);
9310                         fix = 1;
9311                         cur_err = 1;
9312                 }
9313
9314                 if (repair && fix) {
9315                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9316                         if (ret)
9317                                 goto repair_abort;
9318                 }
9319
9320
9321                 if (rec->bad_full_backref) {
9322                         fprintf(stderr, "bad full backref, on [%llu]\n",
9323                                 (unsigned long long)rec->start);
9324                         if (repair) {
9325                                 ret = fixup_extent_flags(root->fs_info, rec);
9326                                 if (ret)
9327                                         goto repair_abort;
9328                                 fix = 1;
9329                         }
9330                         cur_err = 1;
9331                 }
9332                 /*
9333                  * Although it's not a extent ref's problem, we reuse this
9334                  * routine for error reporting.
9335                  * No repair function yet.
9336                  */
9337                 if (rec->crossing_stripes) {
9338                         fprintf(stderr,
9339                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9340                                 rec->start, rec->start + rec->max_size);
9341                         cur_err = 1;
9342                 }
9343
9344                 if (rec->wrong_chunk_type) {
9345                         fprintf(stderr,
9346                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9347                                 rec->start, rec->start + rec->max_size);
9348                         cur_err = 1;
9349                 }
9350
9351                 remove_cache_extent(extent_cache, cache);
9352                 free_all_extent_backrefs(rec);
9353                 if (!init_extent_tree && repair && (!cur_err || fix))
9354                         clear_extent_dirty(root->fs_info->excluded_extents,
9355                                            rec->start,
9356                                            rec->start + rec->max_size - 1);
9357                 free(rec);
9358         }
9359 repair_abort:
9360         if (repair) {
9361                 if (ret && ret != -EAGAIN) {
9362                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9363                         exit(1);
9364                 } else if (!ret) {
9365                         struct btrfs_trans_handle *trans;
9366
9367                         root = root->fs_info->extent_root;
9368                         trans = btrfs_start_transaction(root, 1);
9369                         if (IS_ERR(trans)) {
9370                                 ret = PTR_ERR(trans);
9371                                 goto repair_abort;
9372                         }
9373
9374                         btrfs_fix_block_accounting(trans, root);
9375                         ret = btrfs_commit_transaction(trans, root);
9376                         if (ret)
9377                                 goto repair_abort;
9378                 }
9379                 return ret;
9380         }
9381         return 0;
9382 }
9383
9384 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9385 {
9386         u64 stripe_size;
9387
9388         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9389                 stripe_size = length;
9390                 stripe_size /= num_stripes;
9391         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9392                 stripe_size = length * 2;
9393                 stripe_size /= num_stripes;
9394         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9395                 stripe_size = length;
9396                 stripe_size /= (num_stripes - 1);
9397         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9398                 stripe_size = length;
9399                 stripe_size /= (num_stripes - 2);
9400         } else {
9401                 stripe_size = length;
9402         }
9403         return stripe_size;
9404 }
9405
9406 /*
9407  * Check the chunk with its block group/dev list ref:
9408  * Return 0 if all refs seems valid.
9409  * Return 1 if part of refs seems valid, need later check for rebuild ref
9410  * like missing block group and needs to search extent tree to rebuild them.
9411  * Return -1 if essential refs are missing and unable to rebuild.
9412  */
9413 static int check_chunk_refs(struct chunk_record *chunk_rec,
9414                             struct block_group_tree *block_group_cache,
9415                             struct device_extent_tree *dev_extent_cache,
9416                             int silent)
9417 {
9418         struct cache_extent *block_group_item;
9419         struct block_group_record *block_group_rec;
9420         struct cache_extent *dev_extent_item;
9421         struct device_extent_record *dev_extent_rec;
9422         u64 devid;
9423         u64 offset;
9424         u64 length;
9425         int metadump_v2 = 0;
9426         int i;
9427         int ret = 0;
9428
9429         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9430                                                chunk_rec->offset,
9431                                                chunk_rec->length);
9432         if (block_group_item) {
9433                 block_group_rec = container_of(block_group_item,
9434                                                struct block_group_record,
9435                                                cache);
9436                 if (chunk_rec->length != block_group_rec->offset ||
9437                     chunk_rec->offset != block_group_rec->objectid ||
9438                     (!metadump_v2 &&
9439                      chunk_rec->type_flags != block_group_rec->flags)) {
9440                         if (!silent)
9441                                 fprintf(stderr,
9442                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9443                                         chunk_rec->objectid,
9444                                         chunk_rec->type,
9445                                         chunk_rec->offset,
9446                                         chunk_rec->length,
9447                                         chunk_rec->offset,
9448                                         chunk_rec->type_flags,
9449                                         block_group_rec->objectid,
9450                                         block_group_rec->type,
9451                                         block_group_rec->offset,
9452                                         block_group_rec->offset,
9453                                         block_group_rec->objectid,
9454                                         block_group_rec->flags);
9455                         ret = -1;
9456                 } else {
9457                         list_del_init(&block_group_rec->list);
9458                         chunk_rec->bg_rec = block_group_rec;
9459                 }
9460         } else {
9461                 if (!silent)
9462                         fprintf(stderr,
9463                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9464                                 chunk_rec->objectid,
9465                                 chunk_rec->type,
9466                                 chunk_rec->offset,
9467                                 chunk_rec->length,
9468                                 chunk_rec->offset,
9469                                 chunk_rec->type_flags);
9470                 ret = 1;
9471         }
9472
9473         if (metadump_v2)
9474                 return ret;
9475
9476         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9477                                     chunk_rec->num_stripes);
9478         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9479                 devid = chunk_rec->stripes[i].devid;
9480                 offset = chunk_rec->stripes[i].offset;
9481                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9482                                                        devid, offset, length);
9483                 if (dev_extent_item) {
9484                         dev_extent_rec = container_of(dev_extent_item,
9485                                                 struct device_extent_record,
9486                                                 cache);
9487                         if (dev_extent_rec->objectid != devid ||
9488                             dev_extent_rec->offset != offset ||
9489                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9490                             dev_extent_rec->length != length) {
9491                                 if (!silent)
9492                                         fprintf(stderr,
9493                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9494                                                 chunk_rec->objectid,
9495                                                 chunk_rec->type,
9496                                                 chunk_rec->offset,
9497                                                 chunk_rec->stripes[i].devid,
9498                                                 chunk_rec->stripes[i].offset,
9499                                                 dev_extent_rec->objectid,
9500                                                 dev_extent_rec->offset,
9501                                                 dev_extent_rec->length);
9502                                 ret = -1;
9503                         } else {
9504                                 list_move(&dev_extent_rec->chunk_list,
9505                                           &chunk_rec->dextents);
9506                         }
9507                 } else {
9508                         if (!silent)
9509                                 fprintf(stderr,
9510                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9511                                         chunk_rec->objectid,
9512                                         chunk_rec->type,
9513                                         chunk_rec->offset,
9514                                         chunk_rec->stripes[i].devid,
9515                                         chunk_rec->stripes[i].offset);
9516                         ret = -1;
9517                 }
9518         }
9519         return ret;
9520 }
9521
9522 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9523 int check_chunks(struct cache_tree *chunk_cache,
9524                  struct block_group_tree *block_group_cache,
9525                  struct device_extent_tree *dev_extent_cache,
9526                  struct list_head *good, struct list_head *bad,
9527                  struct list_head *rebuild, int silent)
9528 {
9529         struct cache_extent *chunk_item;
9530         struct chunk_record *chunk_rec;
9531         struct block_group_record *bg_rec;
9532         struct device_extent_record *dext_rec;
9533         int err;
9534         int ret = 0;
9535
9536         chunk_item = first_cache_extent(chunk_cache);
9537         while (chunk_item) {
9538                 chunk_rec = container_of(chunk_item, struct chunk_record,
9539                                          cache);
9540                 err = check_chunk_refs(chunk_rec, block_group_cache,
9541                                        dev_extent_cache, silent);
9542                 if (err < 0)
9543                         ret = err;
9544                 if (err == 0 && good)
9545                         list_add_tail(&chunk_rec->list, good);
9546                 if (err > 0 && rebuild)
9547                         list_add_tail(&chunk_rec->list, rebuild);
9548                 if (err < 0 && bad)
9549                         list_add_tail(&chunk_rec->list, bad);
9550                 chunk_item = next_cache_extent(chunk_item);
9551         }
9552
9553         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9554                 if (!silent)
9555                         fprintf(stderr,
9556                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9557                                 bg_rec->objectid,
9558                                 bg_rec->offset,
9559                                 bg_rec->flags);
9560                 if (!ret)
9561                         ret = 1;
9562         }
9563
9564         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9565                             chunk_list) {
9566                 if (!silent)
9567                         fprintf(stderr,
9568                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9569                                 dext_rec->objectid,
9570                                 dext_rec->offset,
9571                                 dext_rec->length);
9572                 if (!ret)
9573                         ret = 1;
9574         }
9575         return ret;
9576 }
9577
9578
9579 static int check_device_used(struct device_record *dev_rec,
9580                              struct device_extent_tree *dext_cache)
9581 {
9582         struct cache_extent *cache;
9583         struct device_extent_record *dev_extent_rec;
9584         u64 total_byte = 0;
9585
9586         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9587         while (cache) {
9588                 dev_extent_rec = container_of(cache,
9589                                               struct device_extent_record,
9590                                               cache);
9591                 if (dev_extent_rec->objectid != dev_rec->devid)
9592                         break;
9593
9594                 list_del_init(&dev_extent_rec->device_list);
9595                 total_byte += dev_extent_rec->length;
9596                 cache = next_cache_extent(cache);
9597         }
9598
9599         if (total_byte != dev_rec->byte_used) {
9600                 fprintf(stderr,
9601                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9602                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9603                         dev_rec->type, dev_rec->offset);
9604                 return -1;
9605         } else {
9606                 return 0;
9607         }
9608 }
9609
9610 /* check btrfs_dev_item -> btrfs_dev_extent */
9611 static int check_devices(struct rb_root *dev_cache,
9612                          struct device_extent_tree *dev_extent_cache)
9613 {
9614         struct rb_node *dev_node;
9615         struct device_record *dev_rec;
9616         struct device_extent_record *dext_rec;
9617         int err;
9618         int ret = 0;
9619
9620         dev_node = rb_first(dev_cache);
9621         while (dev_node) {
9622                 dev_rec = container_of(dev_node, struct device_record, node);
9623                 err = check_device_used(dev_rec, dev_extent_cache);
9624                 if (err)
9625                         ret = err;
9626
9627                 dev_node = rb_next(dev_node);
9628         }
9629         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9630                             device_list) {
9631                 fprintf(stderr,
9632                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9633                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9634                 if (!ret)
9635                         ret = 1;
9636         }
9637         return ret;
9638 }
9639
9640 static int add_root_item_to_list(struct list_head *head,
9641                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9642                                   u8 level, u8 drop_level,
9643                                   int level_size, struct btrfs_key *drop_key)
9644 {
9645
9646         struct root_item_record *ri_rec;
9647         ri_rec = malloc(sizeof(*ri_rec));
9648         if (!ri_rec)
9649                 return -ENOMEM;
9650         ri_rec->bytenr = bytenr;
9651         ri_rec->objectid = objectid;
9652         ri_rec->level = level;
9653         ri_rec->level_size = level_size;
9654         ri_rec->drop_level = drop_level;
9655         ri_rec->last_snapshot = last_snapshot;
9656         if (drop_key)
9657                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9658         list_add_tail(&ri_rec->list, head);
9659
9660         return 0;
9661 }
9662
9663 static void free_root_item_list(struct list_head *list)
9664 {
9665         struct root_item_record *ri_rec;
9666
9667         while (!list_empty(list)) {
9668                 ri_rec = list_first_entry(list, struct root_item_record,
9669                                           list);
9670                 list_del_init(&ri_rec->list);
9671                 free(ri_rec);
9672         }
9673 }
9674
9675 static int deal_root_from_list(struct list_head *list,
9676                                struct btrfs_root *root,
9677                                struct block_info *bits,
9678                                int bits_nr,
9679                                struct cache_tree *pending,
9680                                struct cache_tree *seen,
9681                                struct cache_tree *reada,
9682                                struct cache_tree *nodes,
9683                                struct cache_tree *extent_cache,
9684                                struct cache_tree *chunk_cache,
9685                                struct rb_root *dev_cache,
9686                                struct block_group_tree *block_group_cache,
9687                                struct device_extent_tree *dev_extent_cache)
9688 {
9689         int ret = 0;
9690         u64 last;
9691
9692         while (!list_empty(list)) {
9693                 struct root_item_record *rec;
9694                 struct extent_buffer *buf;
9695                 rec = list_entry(list->next,
9696                                  struct root_item_record, list);
9697                 last = 0;
9698                 buf = read_tree_block(root->fs_info->tree_root,
9699                                       rec->bytenr, rec->level_size, 0);
9700                 if (!extent_buffer_uptodate(buf)) {
9701                         free_extent_buffer(buf);
9702                         ret = -EIO;
9703                         break;
9704                 }
9705                 ret = add_root_to_pending(buf, extent_cache, pending,
9706                                     seen, nodes, rec->objectid);
9707                 if (ret < 0)
9708                         break;
9709                 /*
9710                  * To rebuild extent tree, we need deal with snapshot
9711                  * one by one, otherwise we deal with node firstly which
9712                  * can maximize readahead.
9713                  */
9714                 while (1) {
9715                         ret = run_next_block(root, bits, bits_nr, &last,
9716                                              pending, seen, reada, nodes,
9717                                              extent_cache, chunk_cache,
9718                                              dev_cache, block_group_cache,
9719                                              dev_extent_cache, rec);
9720                         if (ret != 0)
9721                                 break;
9722                 }
9723                 free_extent_buffer(buf);
9724                 list_del(&rec->list);
9725                 free(rec);
9726                 if (ret < 0)
9727                         break;
9728         }
9729         while (ret >= 0) {
9730                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9731                                      reada, nodes, extent_cache, chunk_cache,
9732                                      dev_cache, block_group_cache,
9733                                      dev_extent_cache, NULL);
9734                 if (ret != 0) {
9735                         if (ret > 0)
9736                                 ret = 0;
9737                         break;
9738                 }
9739         }
9740         return ret;
9741 }
9742
9743 static int check_chunks_and_extents(struct btrfs_root *root)
9744 {
9745         struct rb_root dev_cache;
9746         struct cache_tree chunk_cache;
9747         struct block_group_tree block_group_cache;
9748         struct device_extent_tree dev_extent_cache;
9749         struct cache_tree extent_cache;
9750         struct cache_tree seen;
9751         struct cache_tree pending;
9752         struct cache_tree reada;
9753         struct cache_tree nodes;
9754         struct extent_io_tree excluded_extents;
9755         struct cache_tree corrupt_blocks;
9756         struct btrfs_path path;
9757         struct btrfs_key key;
9758         struct btrfs_key found_key;
9759         int ret, err = 0;
9760         struct block_info *bits;
9761         int bits_nr;
9762         struct extent_buffer *leaf;
9763         int slot;
9764         struct btrfs_root_item ri;
9765         struct list_head dropping_trees;
9766         struct list_head normal_trees;
9767         struct btrfs_root *root1;
9768         u64 objectid;
9769         u32 level_size;
9770         u8 level;
9771
9772         dev_cache = RB_ROOT;
9773         cache_tree_init(&chunk_cache);
9774         block_group_tree_init(&block_group_cache);
9775         device_extent_tree_init(&dev_extent_cache);
9776
9777         cache_tree_init(&extent_cache);
9778         cache_tree_init(&seen);
9779         cache_tree_init(&pending);
9780         cache_tree_init(&nodes);
9781         cache_tree_init(&reada);
9782         cache_tree_init(&corrupt_blocks);
9783         extent_io_tree_init(&excluded_extents);
9784         INIT_LIST_HEAD(&dropping_trees);
9785         INIT_LIST_HEAD(&normal_trees);
9786
9787         if (repair) {
9788                 root->fs_info->excluded_extents = &excluded_extents;
9789                 root->fs_info->fsck_extent_cache = &extent_cache;
9790                 root->fs_info->free_extent_hook = free_extent_hook;
9791                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9792         }
9793
9794         bits_nr = 1024;
9795         bits = malloc(bits_nr * sizeof(struct block_info));
9796         if (!bits) {
9797                 perror("malloc");
9798                 exit(1);
9799         }
9800
9801         if (ctx.progress_enabled) {
9802                 ctx.tp = TASK_EXTENTS;
9803                 task_start(ctx.info);
9804         }
9805
9806 again:
9807         root1 = root->fs_info->tree_root;
9808         level = btrfs_header_level(root1->node);
9809         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9810                                     root1->node->start, 0, level, 0,
9811                                     root1->nodesize, NULL);
9812         if (ret < 0)
9813                 goto out;
9814         root1 = root->fs_info->chunk_root;
9815         level = btrfs_header_level(root1->node);
9816         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9817                                     root1->node->start, 0, level, 0,
9818                                     root1->nodesize, NULL);
9819         if (ret < 0)
9820                 goto out;
9821         btrfs_init_path(&path);
9822         key.offset = 0;
9823         key.objectid = 0;
9824         key.type = BTRFS_ROOT_ITEM_KEY;
9825         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9826                                         &key, &path, 0, 0);
9827         if (ret < 0)
9828                 goto out;
9829         while(1) {
9830                 leaf = path.nodes[0];
9831                 slot = path.slots[0];
9832                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9833                         ret = btrfs_next_leaf(root, &path);
9834                         if (ret != 0)
9835                                 break;
9836                         leaf = path.nodes[0];
9837                         slot = path.slots[0];
9838                 }
9839                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9840                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9841                         unsigned long offset;
9842                         u64 last_snapshot;
9843
9844                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9845                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9846                         last_snapshot = btrfs_root_last_snapshot(&ri);
9847                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9848                                 level = btrfs_root_level(&ri);
9849                                 level_size = root->nodesize;
9850                                 ret = add_root_item_to_list(&normal_trees,
9851                                                 found_key.objectid,
9852                                                 btrfs_root_bytenr(&ri),
9853                                                 last_snapshot, level,
9854                                                 0, level_size, NULL);
9855                                 if (ret < 0)
9856                                         goto out;
9857                         } else {
9858                                 level = btrfs_root_level(&ri);
9859                                 level_size = root->nodesize;
9860                                 objectid = found_key.objectid;
9861                                 btrfs_disk_key_to_cpu(&found_key,
9862                                                       &ri.drop_progress);
9863                                 ret = add_root_item_to_list(&dropping_trees,
9864                                                 objectid,
9865                                                 btrfs_root_bytenr(&ri),
9866                                                 last_snapshot, level,
9867                                                 ri.drop_level,
9868                                                 level_size, &found_key);
9869                                 if (ret < 0)
9870                                         goto out;
9871                         }
9872                 }
9873                 path.slots[0]++;
9874         }
9875         btrfs_release_path(&path);
9876
9877         /*
9878          * check_block can return -EAGAIN if it fixes something, please keep
9879          * this in mind when dealing with return values from these functions, if
9880          * we get -EAGAIN we want to fall through and restart the loop.
9881          */
9882         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9883                                   &seen, &reada, &nodes, &extent_cache,
9884                                   &chunk_cache, &dev_cache, &block_group_cache,
9885                                   &dev_extent_cache);
9886         if (ret < 0) {
9887                 if (ret == -EAGAIN)
9888                         goto loop;
9889                 goto out;
9890         }
9891         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9892                                   &pending, &seen, &reada, &nodes,
9893                                   &extent_cache, &chunk_cache, &dev_cache,
9894                                   &block_group_cache, &dev_extent_cache);
9895         if (ret < 0) {
9896                 if (ret == -EAGAIN)
9897                         goto loop;
9898                 goto out;
9899         }
9900
9901         ret = check_chunks(&chunk_cache, &block_group_cache,
9902                            &dev_extent_cache, NULL, NULL, NULL, 0);
9903         if (ret) {
9904                 if (ret == -EAGAIN)
9905                         goto loop;
9906                 err = ret;
9907         }
9908
9909         ret = check_extent_refs(root, &extent_cache);
9910         if (ret < 0) {
9911                 if (ret == -EAGAIN)
9912                         goto loop;
9913                 goto out;
9914         }
9915
9916         ret = check_devices(&dev_cache, &dev_extent_cache);
9917         if (ret && err)
9918                 ret = err;
9919
9920 out:
9921         task_stop(ctx.info);
9922         if (repair) {
9923                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9924                 extent_io_tree_cleanup(&excluded_extents);
9925                 root->fs_info->fsck_extent_cache = NULL;
9926                 root->fs_info->free_extent_hook = NULL;
9927                 root->fs_info->corrupt_blocks = NULL;
9928                 root->fs_info->excluded_extents = NULL;
9929         }
9930         free(bits);
9931         free_chunk_cache_tree(&chunk_cache);
9932         free_device_cache_tree(&dev_cache);
9933         free_block_group_tree(&block_group_cache);
9934         free_device_extent_tree(&dev_extent_cache);
9935         free_extent_cache_tree(&seen);
9936         free_extent_cache_tree(&pending);
9937         free_extent_cache_tree(&reada);
9938         free_extent_cache_tree(&nodes);
9939         return ret;
9940 loop:
9941         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9942         free_extent_cache_tree(&seen);
9943         free_extent_cache_tree(&pending);
9944         free_extent_cache_tree(&reada);
9945         free_extent_cache_tree(&nodes);
9946         free_chunk_cache_tree(&chunk_cache);
9947         free_block_group_tree(&block_group_cache);
9948         free_device_cache_tree(&dev_cache);
9949         free_device_extent_tree(&dev_extent_cache);
9950         free_extent_record_cache(root->fs_info, &extent_cache);
9951         free_root_item_list(&normal_trees);
9952         free_root_item_list(&dropping_trees);
9953         extent_io_tree_cleanup(&excluded_extents);
9954         goto again;
9955 }
9956
9957 /*
9958  * Check backrefs of a tree block given by @bytenr or @eb.
9959  *
9960  * @root:       the root containing the @bytenr or @eb
9961  * @eb:         tree block extent buffer, can be NULL
9962  * @bytenr:     bytenr of the tree block to search
9963  * @level:      tree level of the tree block
9964  * @owner:      owner of the tree block
9965  *
9966  * Return >0 for any error found and output error message
9967  * Return 0 for no error found
9968  */
9969 static int check_tree_block_ref(struct btrfs_root *root,
9970                                 struct extent_buffer *eb, u64 bytenr,
9971                                 int level, u64 owner)
9972 {
9973         struct btrfs_key key;
9974         struct btrfs_root *extent_root = root->fs_info->extent_root;
9975         struct btrfs_path path;
9976         struct btrfs_extent_item *ei;
9977         struct btrfs_extent_inline_ref *iref;
9978         struct extent_buffer *leaf;
9979         unsigned long end;
9980         unsigned long ptr;
9981         int slot;
9982         int skinny_level;
9983         int type;
9984         u32 nodesize = root->nodesize;
9985         u32 item_size;
9986         u64 offset;
9987         int tree_reloc_root = 0;
9988         int found_ref = 0;
9989         int err = 0;
9990         int ret;
9991
9992         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9993             btrfs_header_bytenr(root->node) == bytenr)
9994                 tree_reloc_root = 1;
9995
9996         btrfs_init_path(&path);
9997         key.objectid = bytenr;
9998         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9999                 key.type = BTRFS_METADATA_ITEM_KEY;
10000         else
10001                 key.type = BTRFS_EXTENT_ITEM_KEY;
10002         key.offset = (u64)-1;
10003
10004         /* Search for the backref in extent tree */
10005         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10006         if (ret < 0) {
10007                 err |= BACKREF_MISSING;
10008                 goto out;
10009         }
10010         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10011         if (ret) {
10012                 err |= BACKREF_MISSING;
10013                 goto out;
10014         }
10015
10016         leaf = path.nodes[0];
10017         slot = path.slots[0];
10018         btrfs_item_key_to_cpu(leaf, &key, slot);
10019
10020         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10021
10022         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10023                 skinny_level = (int)key.offset;
10024                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10025         } else {
10026                 struct btrfs_tree_block_info *info;
10027
10028                 info = (struct btrfs_tree_block_info *)(ei + 1);
10029                 skinny_level = btrfs_tree_block_level(leaf, info);
10030                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10031         }
10032
10033         if (eb) {
10034                 u64 header_gen;
10035                 u64 extent_gen;
10036
10037                 if (!(btrfs_extent_flags(leaf, ei) &
10038                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10039                         error(
10040                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10041                                 key.objectid, nodesize,
10042                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10043                         err = BACKREF_MISMATCH;
10044                 }
10045                 header_gen = btrfs_header_generation(eb);
10046                 extent_gen = btrfs_extent_generation(leaf, ei);
10047                 if (header_gen != extent_gen) {
10048                         error(
10049         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10050                                 key.objectid, nodesize, header_gen,
10051                                 extent_gen);
10052                         err = BACKREF_MISMATCH;
10053                 }
10054                 if (level != skinny_level) {
10055                         error(
10056                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10057                                 key.objectid, nodesize, level, skinny_level);
10058                         err = BACKREF_MISMATCH;
10059                 }
10060                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10061                         error(
10062                         "extent[%llu %u] is referred by other roots than %llu",
10063                                 key.objectid, nodesize, root->objectid);
10064                         err = BACKREF_MISMATCH;
10065                 }
10066         }
10067
10068         /*
10069          * Iterate the extent/metadata item to find the exact backref
10070          */
10071         item_size = btrfs_item_size_nr(leaf, slot);
10072         ptr = (unsigned long)iref;
10073         end = (unsigned long)ei + item_size;
10074         while (ptr < end) {
10075                 iref = (struct btrfs_extent_inline_ref *)ptr;
10076                 type = btrfs_extent_inline_ref_type(leaf, iref);
10077                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10078
10079                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10080                         (offset == root->objectid || offset == owner)) {
10081                         found_ref = 1;
10082                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10083                         /*
10084                          * Backref of tree reloc root points to itself, no need
10085                          * to check backref any more.
10086                          */
10087                         if (tree_reloc_root)
10088                                 found_ref = 1;
10089                         else
10090                         /* Check if the backref points to valid referencer */
10091                                 found_ref = !check_tree_block_ref(root, NULL,
10092                                                 offset, level + 1, owner);
10093                 }
10094
10095                 if (found_ref)
10096                         break;
10097                 ptr += btrfs_extent_inline_ref_size(type);
10098         }
10099
10100         /*
10101          * Inlined extent item doesn't have what we need, check
10102          * TREE_BLOCK_REF_KEY
10103          */
10104         if (!found_ref) {
10105                 btrfs_release_path(&path);
10106                 key.objectid = bytenr;
10107                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10108                 key.offset = root->objectid;
10109
10110                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10111                 if (!ret)
10112                         found_ref = 1;
10113         }
10114         if (!found_ref)
10115                 err |= BACKREF_MISSING;
10116 out:
10117         btrfs_release_path(&path);
10118         if (eb && (err & BACKREF_MISSING))
10119                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10120                         bytenr, nodesize, owner, level);
10121         return err;
10122 }
10123
10124 /*
10125  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10126  *
10127  * Return >0 any error found and output error message
10128  * Return 0 for no error found
10129  */
10130 static int check_extent_data_item(struct btrfs_root *root,
10131                                   struct extent_buffer *eb, int slot)
10132 {
10133         struct btrfs_file_extent_item *fi;
10134         struct btrfs_path path;
10135         struct btrfs_root *extent_root = root->fs_info->extent_root;
10136         struct btrfs_key fi_key;
10137         struct btrfs_key dbref_key;
10138         struct extent_buffer *leaf;
10139         struct btrfs_extent_item *ei;
10140         struct btrfs_extent_inline_ref *iref;
10141         struct btrfs_extent_data_ref *dref;
10142         u64 owner;
10143         u64 disk_bytenr;
10144         u64 disk_num_bytes;
10145         u64 extent_num_bytes;
10146         u64 extent_flags;
10147         u32 item_size;
10148         unsigned long end;
10149         unsigned long ptr;
10150         int type;
10151         u64 ref_root;
10152         int found_dbackref = 0;
10153         int err = 0;
10154         int ret;
10155
10156         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10157         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10158
10159         /* Nothing to check for hole and inline data extents */
10160         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10161             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10162                 return 0;
10163
10164         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10165         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10166         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10167
10168         /* Check unaligned disk_num_bytes and num_bytes */
10169         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10170                 error(
10171 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10172                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10173                         root->sectorsize);
10174                 err |= BYTES_UNALIGNED;
10175         } else {
10176                 data_bytes_allocated += disk_num_bytes;
10177         }
10178         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10179                 error(
10180 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10181                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10182                         root->sectorsize);
10183                 err |= BYTES_UNALIGNED;
10184         } else {
10185                 data_bytes_referenced += extent_num_bytes;
10186         }
10187         owner = btrfs_header_owner(eb);
10188
10189         /* Check the extent item of the file extent in extent tree */
10190         btrfs_init_path(&path);
10191         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10192         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10193         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10194
10195         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10196         if (ret) {
10197                 err |= BACKREF_MISSING;
10198                 goto error;
10199         }
10200
10201         leaf = path.nodes[0];
10202         slot = path.slots[0];
10203         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10204
10205         extent_flags = btrfs_extent_flags(leaf, ei);
10206
10207         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10208                 error(
10209                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10210                     disk_bytenr, disk_num_bytes,
10211                     BTRFS_EXTENT_FLAG_DATA);
10212                 err |= BACKREF_MISMATCH;
10213         }
10214
10215         /* Check data backref inside that extent item */
10216         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10217         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10218         ptr = (unsigned long)iref;
10219         end = (unsigned long)ei + item_size;
10220         while (ptr < end) {
10221                 iref = (struct btrfs_extent_inline_ref *)ptr;
10222                 type = btrfs_extent_inline_ref_type(leaf, iref);
10223                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10224
10225                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10226                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10227                         if (ref_root == owner || ref_root == root->objectid)
10228                                 found_dbackref = 1;
10229                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10230                         found_dbackref = !check_tree_block_ref(root, NULL,
10231                                 btrfs_extent_inline_ref_offset(leaf, iref),
10232                                 0, owner);
10233                 }
10234
10235                 if (found_dbackref)
10236                         break;
10237                 ptr += btrfs_extent_inline_ref_size(type);
10238         }
10239
10240         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10241         if (!found_dbackref) {
10242                 btrfs_release_path(&path);
10243
10244                 btrfs_init_path(&path);
10245                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10246                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10247                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10248                                 fi_key.objectid, fi_key.offset);
10249
10250                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10251                                         &dbref_key, &path, 0, 0);
10252                 if (!ret)
10253                         found_dbackref = 1;
10254         }
10255
10256         if (!found_dbackref)
10257                 err |= BACKREF_MISSING;
10258 error:
10259         btrfs_release_path(&path);
10260         if (err & BACKREF_MISSING) {
10261                 error("data extent[%llu %llu] backref lost",
10262                       disk_bytenr, disk_num_bytes);
10263         }
10264         return err;
10265 }
10266
10267 /*
10268  * Get real tree block level for the case like shared block
10269  * Return >= 0 as tree level
10270  * Return <0 for error
10271  */
10272 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10273 {
10274         struct extent_buffer *eb;
10275         struct btrfs_path path;
10276         struct btrfs_key key;
10277         struct btrfs_extent_item *ei;
10278         u64 flags;
10279         u64 transid;
10280         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10281         u8 backref_level;
10282         u8 header_level;
10283         int ret;
10284
10285         /* Search extent tree for extent generation and level */
10286         key.objectid = bytenr;
10287         key.type = BTRFS_METADATA_ITEM_KEY;
10288         key.offset = (u64)-1;
10289
10290         btrfs_init_path(&path);
10291         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10292         if (ret < 0)
10293                 goto release_out;
10294         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10295         if (ret < 0)
10296                 goto release_out;
10297         if (ret > 0) {
10298                 ret = -ENOENT;
10299                 goto release_out;
10300         }
10301
10302         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10303         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10304                             struct btrfs_extent_item);
10305         flags = btrfs_extent_flags(path.nodes[0], ei);
10306         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10307                 ret = -ENOENT;
10308                 goto release_out;
10309         }
10310
10311         /* Get transid for later read_tree_block() check */
10312         transid = btrfs_extent_generation(path.nodes[0], ei);
10313
10314         /* Get backref level as one source */
10315         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10316                 backref_level = key.offset;
10317         } else {
10318                 struct btrfs_tree_block_info *info;
10319
10320                 info = (struct btrfs_tree_block_info *)(ei + 1);
10321                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10322         }
10323         btrfs_release_path(&path);
10324
10325         /* Get level from tree block as an alternative source */
10326         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10327         if (!extent_buffer_uptodate(eb)) {
10328                 free_extent_buffer(eb);
10329                 return -EIO;
10330         }
10331         header_level = btrfs_header_level(eb);
10332         free_extent_buffer(eb);
10333
10334         if (header_level != backref_level)
10335                 return -EIO;
10336         return header_level;
10337
10338 release_out:
10339         btrfs_release_path(&path);
10340         return ret;
10341 }
10342
10343 /*
10344  * Check if a tree block backref is valid (points to a valid tree block)
10345  * if level == -1, level will be resolved
10346  * Return >0 for any error found and print error message
10347  */
10348 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10349                                     u64 bytenr, int level)
10350 {
10351         struct btrfs_root *root;
10352         struct btrfs_key key;
10353         struct btrfs_path path;
10354         struct extent_buffer *eb;
10355         struct extent_buffer *node;
10356         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10357         int err = 0;
10358         int ret;
10359
10360         /* Query level for level == -1 special case */
10361         if (level == -1)
10362                 level = query_tree_block_level(fs_info, bytenr);
10363         if (level < 0) {
10364                 err |= REFERENCER_MISSING;
10365                 goto out;
10366         }
10367
10368         key.objectid = root_id;
10369         key.type = BTRFS_ROOT_ITEM_KEY;
10370         key.offset = (u64)-1;
10371
10372         root = btrfs_read_fs_root(fs_info, &key);
10373         if (IS_ERR(root)) {
10374                 err |= REFERENCER_MISSING;
10375                 goto out;
10376         }
10377
10378         /* Read out the tree block to get item/node key */
10379         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10380         if (!extent_buffer_uptodate(eb)) {
10381                 err |= REFERENCER_MISSING;
10382                 free_extent_buffer(eb);
10383                 goto out;
10384         }
10385
10386         /* Empty tree, no need to check key */
10387         if (!btrfs_header_nritems(eb) && !level) {
10388                 free_extent_buffer(eb);
10389                 goto out;
10390         }
10391
10392         if (level)
10393                 btrfs_node_key_to_cpu(eb, &key, 0);
10394         else
10395                 btrfs_item_key_to_cpu(eb, &key, 0);
10396
10397         free_extent_buffer(eb);
10398
10399         btrfs_init_path(&path);
10400         path.lowest_level = level;
10401         /* Search with the first key, to ensure we can reach it */
10402         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10403         if (ret < 0) {
10404                 err |= REFERENCER_MISSING;
10405                 goto release_out;
10406         }
10407
10408         node = path.nodes[level];
10409         if (btrfs_header_bytenr(node) != bytenr) {
10410                 error(
10411         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10412                         bytenr, nodesize, bytenr,
10413                         btrfs_header_bytenr(node));
10414                 err |= REFERENCER_MISMATCH;
10415         }
10416         if (btrfs_header_level(node) != level) {
10417                 error(
10418         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10419                         bytenr, nodesize, level,
10420                         btrfs_header_level(node));
10421                 err |= REFERENCER_MISMATCH;
10422         }
10423
10424 release_out:
10425         btrfs_release_path(&path);
10426 out:
10427         if (err & REFERENCER_MISSING) {
10428                 if (level < 0)
10429                         error("extent [%llu %d] lost referencer (owner: %llu)",
10430                                 bytenr, nodesize, root_id);
10431                 else
10432                         error(
10433                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10434                                 bytenr, nodesize, root_id, level);
10435         }
10436
10437         return err;
10438 }
10439
10440 /*
10441  * Check if tree block @eb is tree reloc root.
10442  * Return 0 if it's not or any problem happens
10443  * Return 1 if it's a tree reloc root
10444  */
10445 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10446                                  struct extent_buffer *eb)
10447 {
10448         struct btrfs_root *tree_reloc_root;
10449         struct btrfs_key key;
10450         u64 bytenr = btrfs_header_bytenr(eb);
10451         u64 owner = btrfs_header_owner(eb);
10452         int ret = 0;
10453
10454         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10455         key.offset = owner;
10456         key.type = BTRFS_ROOT_ITEM_KEY;
10457
10458         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10459         if (IS_ERR(tree_reloc_root))
10460                 return 0;
10461
10462         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10463                 ret = 1;
10464         btrfs_free_fs_root(tree_reloc_root);
10465         return ret;
10466 }
10467
10468 /*
10469  * Check referencer for shared block backref
10470  * If level == -1, this function will resolve the level.
10471  */
10472 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10473                                      u64 parent, u64 bytenr, int level)
10474 {
10475         struct extent_buffer *eb;
10476         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10477         u32 nr;
10478         int found_parent = 0;
10479         int i;
10480
10481         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10482         if (!extent_buffer_uptodate(eb))
10483                 goto out;
10484
10485         if (level == -1)
10486                 level = query_tree_block_level(fs_info, bytenr);
10487         if (level < 0)
10488                 goto out;
10489
10490         /* It's possible it's a tree reloc root */
10491         if (parent == bytenr) {
10492                 if (is_tree_reloc_root(fs_info, eb))
10493                         found_parent = 1;
10494                 goto out;
10495         }
10496
10497         if (level + 1 != btrfs_header_level(eb))
10498                 goto out;
10499
10500         nr = btrfs_header_nritems(eb);
10501         for (i = 0; i < nr; i++) {
10502                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10503                         found_parent = 1;
10504                         break;
10505                 }
10506         }
10507 out:
10508         free_extent_buffer(eb);
10509         if (!found_parent) {
10510                 error(
10511         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10512                         bytenr, nodesize, parent, level);
10513                 return REFERENCER_MISSING;
10514         }
10515         return 0;
10516 }
10517
10518 /*
10519  * Check referencer for normal (inlined) data ref
10520  * If len == 0, it will be resolved by searching in extent tree
10521  */
10522 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10523                                      u64 root_id, u64 objectid, u64 offset,
10524                                      u64 bytenr, u64 len, u32 count)
10525 {
10526         struct btrfs_root *root;
10527         struct btrfs_root *extent_root = fs_info->extent_root;
10528         struct btrfs_key key;
10529         struct btrfs_path path;
10530         struct extent_buffer *leaf;
10531         struct btrfs_file_extent_item *fi;
10532         u32 found_count = 0;
10533         int slot;
10534         int ret = 0;
10535
10536         if (!len) {
10537                 key.objectid = bytenr;
10538                 key.type = BTRFS_EXTENT_ITEM_KEY;
10539                 key.offset = (u64)-1;
10540
10541                 btrfs_init_path(&path);
10542                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10543                 if (ret < 0)
10544                         goto out;
10545                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10546                 if (ret)
10547                         goto out;
10548                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10549                 if (key.objectid != bytenr ||
10550                     key.type != BTRFS_EXTENT_ITEM_KEY)
10551                         goto out;
10552                 len = key.offset;
10553                 btrfs_release_path(&path);
10554         }
10555         key.objectid = root_id;
10556         key.type = BTRFS_ROOT_ITEM_KEY;
10557         key.offset = (u64)-1;
10558         btrfs_init_path(&path);
10559
10560         root = btrfs_read_fs_root(fs_info, &key);
10561         if (IS_ERR(root))
10562                 goto out;
10563
10564         key.objectid = objectid;
10565         key.type = BTRFS_EXTENT_DATA_KEY;
10566         /*
10567          * It can be nasty as data backref offset is
10568          * file offset - file extent offset, which is smaller or
10569          * equal to original backref offset.  The only special case is
10570          * overflow.  So we need to special check and do further search.
10571          */
10572         key.offset = offset & (1ULL << 63) ? 0 : offset;
10573
10574         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10575         if (ret < 0)
10576                 goto out;
10577
10578         /*
10579          * Search afterwards to get correct one
10580          * NOTE: As we must do a comprehensive check on the data backref to
10581          * make sure the dref count also matches, we must iterate all file
10582          * extents for that inode.
10583          */
10584         while (1) {
10585                 leaf = path.nodes[0];
10586                 slot = path.slots[0];
10587
10588                 btrfs_item_key_to_cpu(leaf, &key, slot);
10589                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10590                         break;
10591                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10592                 /*
10593                  * Except normal disk bytenr and disk num bytes, we still
10594                  * need to do extra check on dbackref offset as
10595                  * dbackref offset = file_offset - file_extent_offset
10596                  */
10597                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10598                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10599                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10600                     offset)
10601                         found_count++;
10602
10603                 ret = btrfs_next_item(root, &path);
10604                 if (ret)
10605                         break;
10606         }
10607 out:
10608         btrfs_release_path(&path);
10609         if (found_count != count) {
10610                 error(
10611 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10612                         bytenr, len, root_id, objectid, offset, count, found_count);
10613                 return REFERENCER_MISSING;
10614         }
10615         return 0;
10616 }
10617
10618 /*
10619  * Check if the referencer of a shared data backref exists
10620  */
10621 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10622                                      u64 parent, u64 bytenr)
10623 {
10624         struct extent_buffer *eb;
10625         struct btrfs_key key;
10626         struct btrfs_file_extent_item *fi;
10627         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10628         u32 nr;
10629         int found_parent = 0;
10630         int i;
10631
10632         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10633         if (!extent_buffer_uptodate(eb))
10634                 goto out;
10635
10636         nr = btrfs_header_nritems(eb);
10637         for (i = 0; i < nr; i++) {
10638                 btrfs_item_key_to_cpu(eb, &key, i);
10639                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10640                         continue;
10641
10642                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10643                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10644                         continue;
10645
10646                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10647                         found_parent = 1;
10648                         break;
10649                 }
10650         }
10651
10652 out:
10653         free_extent_buffer(eb);
10654         if (!found_parent) {
10655                 error("shared extent %llu referencer lost (parent: %llu)",
10656                         bytenr, parent);
10657                 return REFERENCER_MISSING;
10658         }
10659         return 0;
10660 }
10661
10662 /*
10663  * This function will check a given extent item, including its backref and
10664  * itself (like crossing stripe boundary and type)
10665  *
10666  * Since we don't use extent_record anymore, introduce new error bit
10667  */
10668 static int check_extent_item(struct btrfs_fs_info *fs_info,
10669                              struct extent_buffer *eb, int slot)
10670 {
10671         struct btrfs_extent_item *ei;
10672         struct btrfs_extent_inline_ref *iref;
10673         struct btrfs_extent_data_ref *dref;
10674         unsigned long end;
10675         unsigned long ptr;
10676         int type;
10677         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10678         u32 item_size = btrfs_item_size_nr(eb, slot);
10679         u64 flags;
10680         u64 offset;
10681         int metadata = 0;
10682         int level;
10683         struct btrfs_key key;
10684         int ret;
10685         int err = 0;
10686
10687         btrfs_item_key_to_cpu(eb, &key, slot);
10688         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10689                 bytes_used += key.offset;
10690         else
10691                 bytes_used += nodesize;
10692
10693         if (item_size < sizeof(*ei)) {
10694                 /*
10695                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10696                  * old thing when on disk format is still un-determined.
10697                  * No need to care about it anymore
10698                  */
10699                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10700                 return -ENOTTY;
10701         }
10702
10703         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10704         flags = btrfs_extent_flags(eb, ei);
10705
10706         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10707                 metadata = 1;
10708         if (metadata && check_crossing_stripes(global_info, key.objectid,
10709                                                eb->len)) {
10710                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10711                       key.objectid, key.objectid + nodesize);
10712                 err |= CROSSING_STRIPE_BOUNDARY;
10713         }
10714
10715         ptr = (unsigned long)(ei + 1);
10716
10717         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10718                 /* Old EXTENT_ITEM metadata */
10719                 struct btrfs_tree_block_info *info;
10720
10721                 info = (struct btrfs_tree_block_info *)ptr;
10722                 level = btrfs_tree_block_level(eb, info);
10723                 ptr += sizeof(struct btrfs_tree_block_info);
10724         } else {
10725                 /* New METADATA_ITEM */
10726                 level = key.offset;
10727         }
10728         end = (unsigned long)ei + item_size;
10729
10730         if (ptr >= end) {
10731                 err |= ITEM_SIZE_MISMATCH;
10732                 goto out;
10733         }
10734
10735         /* Now check every backref in this extent item */
10736 next:
10737         iref = (struct btrfs_extent_inline_ref *)ptr;
10738         type = btrfs_extent_inline_ref_type(eb, iref);
10739         offset = btrfs_extent_inline_ref_offset(eb, iref);
10740         switch (type) {
10741         case BTRFS_TREE_BLOCK_REF_KEY:
10742                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10743                                                level);
10744                 err |= ret;
10745                 break;
10746         case BTRFS_SHARED_BLOCK_REF_KEY:
10747                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10748                                                  level);
10749                 err |= ret;
10750                 break;
10751         case BTRFS_EXTENT_DATA_REF_KEY:
10752                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10753                 ret = check_extent_data_backref(fs_info,
10754                                 btrfs_extent_data_ref_root(eb, dref),
10755                                 btrfs_extent_data_ref_objectid(eb, dref),
10756                                 btrfs_extent_data_ref_offset(eb, dref),
10757                                 key.objectid, key.offset,
10758                                 btrfs_extent_data_ref_count(eb, dref));
10759                 err |= ret;
10760                 break;
10761         case BTRFS_SHARED_DATA_REF_KEY:
10762                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10763                 err |= ret;
10764                 break;
10765         default:
10766                 error("extent[%llu %d %llu] has unknown ref type: %d",
10767                         key.objectid, key.type, key.offset, type);
10768                 err |= UNKNOWN_TYPE;
10769                 goto out;
10770         }
10771
10772         ptr += btrfs_extent_inline_ref_size(type);
10773         if (ptr < end)
10774                 goto next;
10775
10776 out:
10777         return err;
10778 }
10779
10780 /*
10781  * Check if a dev extent item is referred correctly by its chunk
10782  */
10783 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10784                                  struct extent_buffer *eb, int slot)
10785 {
10786         struct btrfs_root *chunk_root = fs_info->chunk_root;
10787         struct btrfs_dev_extent *ptr;
10788         struct btrfs_path path;
10789         struct btrfs_key chunk_key;
10790         struct btrfs_key devext_key;
10791         struct btrfs_chunk *chunk;
10792         struct extent_buffer *l;
10793         int num_stripes;
10794         u64 length;
10795         int i;
10796         int found_chunk = 0;
10797         int ret;
10798
10799         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10800         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10801         length = btrfs_dev_extent_length(eb, ptr);
10802
10803         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10804         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10805         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10806
10807         btrfs_init_path(&path);
10808         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10809         if (ret)
10810                 goto out;
10811
10812         l = path.nodes[0];
10813         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10814         if (btrfs_chunk_length(l, chunk) != length)
10815                 goto out;
10816
10817         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10818         for (i = 0; i < num_stripes; i++) {
10819                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10820                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10821
10822                 if (devid == devext_key.objectid &&
10823                     offset == devext_key.offset) {
10824                         found_chunk = 1;
10825                         break;
10826                 }
10827         }
10828 out:
10829         btrfs_release_path(&path);
10830         if (!found_chunk) {
10831                 error(
10832                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10833                         devext_key.objectid, devext_key.offset, length);
10834                 return REFERENCER_MISSING;
10835         }
10836         return 0;
10837 }
10838
10839 /*
10840  * Check if the used space is correct with the dev item
10841  */
10842 static int check_dev_item(struct btrfs_fs_info *fs_info,
10843                           struct extent_buffer *eb, int slot)
10844 {
10845         struct btrfs_root *dev_root = fs_info->dev_root;
10846         struct btrfs_dev_item *dev_item;
10847         struct btrfs_path path;
10848         struct btrfs_key key;
10849         struct btrfs_dev_extent *ptr;
10850         u64 dev_id;
10851         u64 used;
10852         u64 total = 0;
10853         int ret;
10854
10855         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10856         dev_id = btrfs_device_id(eb, dev_item);
10857         used = btrfs_device_bytes_used(eb, dev_item);
10858
10859         key.objectid = dev_id;
10860         key.type = BTRFS_DEV_EXTENT_KEY;
10861         key.offset = 0;
10862
10863         btrfs_init_path(&path);
10864         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10865         if (ret < 0) {
10866                 btrfs_item_key_to_cpu(eb, &key, slot);
10867                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10868                         key.objectid, key.type, key.offset);
10869                 btrfs_release_path(&path);
10870                 return REFERENCER_MISSING;
10871         }
10872
10873         /* Iterate dev_extents to calculate the used space of a device */
10874         while (1) {
10875                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10876
10877                 if (key.objectid > dev_id)
10878                         break;
10879                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10880                         goto next;
10881
10882                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10883                                      struct btrfs_dev_extent);
10884                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10885 next:
10886                 ret = btrfs_next_item(dev_root, &path);
10887                 if (ret)
10888                         break;
10889         }
10890         btrfs_release_path(&path);
10891
10892         if (used != total) {
10893                 btrfs_item_key_to_cpu(eb, &key, slot);
10894                 error(
10895 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10896                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10897                         BTRFS_DEV_EXTENT_KEY, dev_id);
10898                 return ACCOUNTING_MISMATCH;
10899         }
10900         return 0;
10901 }
10902
10903 /*
10904  * Check a block group item with its referener (chunk) and its used space
10905  * with extent/metadata item
10906  */
10907 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10908                                   struct extent_buffer *eb, int slot)
10909 {
10910         struct btrfs_root *extent_root = fs_info->extent_root;
10911         struct btrfs_root *chunk_root = fs_info->chunk_root;
10912         struct btrfs_block_group_item *bi;
10913         struct btrfs_block_group_item bg_item;
10914         struct btrfs_path path;
10915         struct btrfs_key bg_key;
10916         struct btrfs_key chunk_key;
10917         struct btrfs_key extent_key;
10918         struct btrfs_chunk *chunk;
10919         struct extent_buffer *leaf;
10920         struct btrfs_extent_item *ei;
10921         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10922         u64 flags;
10923         u64 bg_flags;
10924         u64 used;
10925         u64 total = 0;
10926         int ret;
10927         int err = 0;
10928
10929         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10930         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10931         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10932         used = btrfs_block_group_used(&bg_item);
10933         bg_flags = btrfs_block_group_flags(&bg_item);
10934
10935         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10936         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10937         chunk_key.offset = bg_key.objectid;
10938
10939         btrfs_init_path(&path);
10940         /* Search for the referencer chunk */
10941         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10942         if (ret) {
10943                 error(
10944                 "block group[%llu %llu] did not find the related chunk item",
10945                         bg_key.objectid, bg_key.offset);
10946                 err |= REFERENCER_MISSING;
10947         } else {
10948                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10949                                         struct btrfs_chunk);
10950                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10951                                                 bg_key.offset) {
10952                         error(
10953         "block group[%llu %llu] related chunk item length does not match",
10954                                 bg_key.objectid, bg_key.offset);
10955                         err |= REFERENCER_MISMATCH;
10956                 }
10957         }
10958         btrfs_release_path(&path);
10959
10960         /* Search from the block group bytenr */
10961         extent_key.objectid = bg_key.objectid;
10962         extent_key.type = 0;
10963         extent_key.offset = 0;
10964
10965         btrfs_init_path(&path);
10966         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10967         if (ret < 0)
10968                 goto out;
10969
10970         /* Iterate extent tree to account used space */
10971         while (1) {
10972                 leaf = path.nodes[0];
10973                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10974                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10975                         break;
10976
10977                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10978                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10979                         goto next;
10980                 if (extent_key.objectid < bg_key.objectid)
10981                         goto next;
10982
10983                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10984                         total += nodesize;
10985                 else
10986                         total += extent_key.offset;
10987
10988                 ei = btrfs_item_ptr(leaf, path.slots[0],
10989                                     struct btrfs_extent_item);
10990                 flags = btrfs_extent_flags(leaf, ei);
10991                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10992                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10993                                 error(
10994                         "bad extent[%llu, %llu) type mismatch with chunk",
10995                                         extent_key.objectid,
10996                                         extent_key.objectid + extent_key.offset);
10997                                 err |= CHUNK_TYPE_MISMATCH;
10998                         }
10999                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11000                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11001                                     BTRFS_BLOCK_GROUP_METADATA))) {
11002                                 error(
11003                         "bad extent[%llu, %llu) type mismatch with chunk",
11004                                         extent_key.objectid,
11005                                         extent_key.objectid + nodesize);
11006                                 err |= CHUNK_TYPE_MISMATCH;
11007                         }
11008                 }
11009 next:
11010                 ret = btrfs_next_item(extent_root, &path);
11011                 if (ret)
11012                         break;
11013         }
11014
11015 out:
11016         btrfs_release_path(&path);
11017
11018         if (total != used) {
11019                 error(
11020                 "block group[%llu %llu] used %llu but extent items used %llu",
11021                         bg_key.objectid, bg_key.offset, used, total);
11022                 err |= ACCOUNTING_MISMATCH;
11023         }
11024         return err;
11025 }
11026
11027 /*
11028  * Check a chunk item.
11029  * Including checking all referred dev_extents and block group
11030  */
11031 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11032                             struct extent_buffer *eb, int slot)
11033 {
11034         struct btrfs_root *extent_root = fs_info->extent_root;
11035         struct btrfs_root *dev_root = fs_info->dev_root;
11036         struct btrfs_path path;
11037         struct btrfs_key chunk_key;
11038         struct btrfs_key bg_key;
11039         struct btrfs_key devext_key;
11040         struct btrfs_chunk *chunk;
11041         struct extent_buffer *leaf;
11042         struct btrfs_block_group_item *bi;
11043         struct btrfs_block_group_item bg_item;
11044         struct btrfs_dev_extent *ptr;
11045         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11046         u64 length;
11047         u64 chunk_end;
11048         u64 type;
11049         u64 profile;
11050         int num_stripes;
11051         u64 offset;
11052         u64 objectid;
11053         int i;
11054         int ret;
11055         int err = 0;
11056
11057         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11058         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11059         length = btrfs_chunk_length(eb, chunk);
11060         chunk_end = chunk_key.offset + length;
11061         if (!IS_ALIGNED(length, sectorsize)) {
11062                 error("chunk[%llu %llu) not aligned to %u",
11063                         chunk_key.offset, chunk_end, sectorsize);
11064                 err |= BYTES_UNALIGNED;
11065                 goto out;
11066         }
11067
11068         type = btrfs_chunk_type(eb, chunk);
11069         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11070         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11071                 error("chunk[%llu %llu) has no chunk type",
11072                         chunk_key.offset, chunk_end);
11073                 err |= UNKNOWN_TYPE;
11074         }
11075         if (profile && (profile & (profile - 1))) {
11076                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11077                         chunk_key.offset, chunk_end, profile);
11078                 err |= UNKNOWN_TYPE;
11079         }
11080
11081         bg_key.objectid = chunk_key.offset;
11082         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11083         bg_key.offset = length;
11084
11085         btrfs_init_path(&path);
11086         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11087         if (ret) {
11088                 error(
11089                 "chunk[%llu %llu) did not find the related block group item",
11090                         chunk_key.offset, chunk_end);
11091                 err |= REFERENCER_MISSING;
11092         } else{
11093                 leaf = path.nodes[0];
11094                 bi = btrfs_item_ptr(leaf, path.slots[0],
11095                                     struct btrfs_block_group_item);
11096                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11097                                    sizeof(bg_item));
11098                 if (btrfs_block_group_flags(&bg_item) != type) {
11099                         error(
11100 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11101                                 chunk_key.offset, chunk_end, type,
11102                                 btrfs_block_group_flags(&bg_item));
11103                         err |= REFERENCER_MISSING;
11104                 }
11105         }
11106
11107         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11108         for (i = 0; i < num_stripes; i++) {
11109                 btrfs_release_path(&path);
11110                 btrfs_init_path(&path);
11111                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11112                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11113                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11114
11115                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11116                                         0, 0);
11117                 if (ret)
11118                         goto not_match_dev;
11119
11120                 leaf = path.nodes[0];
11121                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11122                                      struct btrfs_dev_extent);
11123                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11124                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11125                 if (objectid != chunk_key.objectid ||
11126                     offset != chunk_key.offset ||
11127                     btrfs_dev_extent_length(leaf, ptr) != length)
11128                         goto not_match_dev;
11129                 continue;
11130 not_match_dev:
11131                 err |= BACKREF_MISSING;
11132                 error(
11133                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11134                         chunk_key.objectid, chunk_end, i);
11135                 continue;
11136         }
11137         btrfs_release_path(&path);
11138 out:
11139         return err;
11140 }
11141
11142 /*
11143  * Main entry function to check known items and update related accounting info
11144  */
11145 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11146 {
11147         struct btrfs_fs_info *fs_info = root->fs_info;
11148         struct btrfs_key key;
11149         int slot = 0;
11150         int type;
11151         struct btrfs_extent_data_ref *dref;
11152         int ret;
11153         int err = 0;
11154
11155 next:
11156         btrfs_item_key_to_cpu(eb, &key, slot);
11157         type = key.type;
11158
11159         switch (type) {
11160         case BTRFS_EXTENT_DATA_KEY:
11161                 ret = check_extent_data_item(root, eb, slot);
11162                 err |= ret;
11163                 break;
11164         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11165                 ret = check_block_group_item(fs_info, eb, slot);
11166                 err |= ret;
11167                 break;
11168         case BTRFS_DEV_ITEM_KEY:
11169                 ret = check_dev_item(fs_info, eb, slot);
11170                 err |= ret;
11171                 break;
11172         case BTRFS_CHUNK_ITEM_KEY:
11173                 ret = check_chunk_item(fs_info, eb, slot);
11174                 err |= ret;
11175                 break;
11176         case BTRFS_DEV_EXTENT_KEY:
11177                 ret = check_dev_extent_item(fs_info, eb, slot);
11178                 err |= ret;
11179                 break;
11180         case BTRFS_EXTENT_ITEM_KEY:
11181         case BTRFS_METADATA_ITEM_KEY:
11182                 ret = check_extent_item(fs_info, eb, slot);
11183                 err |= ret;
11184                 break;
11185         case BTRFS_EXTENT_CSUM_KEY:
11186                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11187                 break;
11188         case BTRFS_TREE_BLOCK_REF_KEY:
11189                 ret = check_tree_block_backref(fs_info, key.offset,
11190                                                key.objectid, -1);
11191                 err |= ret;
11192                 break;
11193         case BTRFS_EXTENT_DATA_REF_KEY:
11194                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11195                 ret = check_extent_data_backref(fs_info,
11196                                 btrfs_extent_data_ref_root(eb, dref),
11197                                 btrfs_extent_data_ref_objectid(eb, dref),
11198                                 btrfs_extent_data_ref_offset(eb, dref),
11199                                 key.objectid, 0,
11200                                 btrfs_extent_data_ref_count(eb, dref));
11201                 err |= ret;
11202                 break;
11203         case BTRFS_SHARED_BLOCK_REF_KEY:
11204                 ret = check_shared_block_backref(fs_info, key.offset,
11205                                                  key.objectid, -1);
11206                 err |= ret;
11207                 break;
11208         case BTRFS_SHARED_DATA_REF_KEY:
11209                 ret = check_shared_data_backref(fs_info, key.offset,
11210                                                 key.objectid);
11211                 err |= ret;
11212                 break;
11213         default:
11214                 break;
11215         }
11216
11217         if (++slot < btrfs_header_nritems(eb))
11218                 goto next;
11219
11220         return err;
11221 }
11222
11223 /*
11224  * Helper function for later fs/subvol tree check.  To determine if a tree
11225  * block should be checked.
11226  * This function will ensure only the direct referencer with lowest rootid to
11227  * check a fs/subvolume tree block.
11228  *
11229  * Backref check at extent tree would detect errors like missing subvolume
11230  * tree, so we can do aggressive check to reduce duplicated checks.
11231  */
11232 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11233 {
11234         struct btrfs_root *extent_root = root->fs_info->extent_root;
11235         struct btrfs_key key;
11236         struct btrfs_path path;
11237         struct extent_buffer *leaf;
11238         int slot;
11239         struct btrfs_extent_item *ei;
11240         unsigned long ptr;
11241         unsigned long end;
11242         int type;
11243         u32 item_size;
11244         u64 offset;
11245         struct btrfs_extent_inline_ref *iref;
11246         int ret;
11247
11248         btrfs_init_path(&path);
11249         key.objectid = btrfs_header_bytenr(eb);
11250         key.type = BTRFS_METADATA_ITEM_KEY;
11251         key.offset = (u64)-1;
11252
11253         /*
11254          * Any failure in backref resolving means we can't determine
11255          * whom the tree block belongs to.
11256          * So in that case, we need to check that tree block
11257          */
11258         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11259         if (ret < 0)
11260                 goto need_check;
11261
11262         ret = btrfs_previous_extent_item(extent_root, &path,
11263                                          btrfs_header_bytenr(eb));
11264         if (ret)
11265                 goto need_check;
11266
11267         leaf = path.nodes[0];
11268         slot = path.slots[0];
11269         btrfs_item_key_to_cpu(leaf, &key, slot);
11270         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11271
11272         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11273                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11274         } else {
11275                 struct btrfs_tree_block_info *info;
11276
11277                 info = (struct btrfs_tree_block_info *)(ei + 1);
11278                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11279         }
11280
11281         item_size = btrfs_item_size_nr(leaf, slot);
11282         ptr = (unsigned long)iref;
11283         end = (unsigned long)ei + item_size;
11284         while (ptr < end) {
11285                 iref = (struct btrfs_extent_inline_ref *)ptr;
11286                 type = btrfs_extent_inline_ref_type(leaf, iref);
11287                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11288
11289                 /*
11290                  * We only check the tree block if current root is
11291                  * the lowest referencer of it.
11292                  */
11293                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11294                     offset < root->objectid) {
11295                         btrfs_release_path(&path);
11296                         return 0;
11297                 }
11298
11299                 ptr += btrfs_extent_inline_ref_size(type);
11300         }
11301         /*
11302          * Normally we should also check keyed tree block ref, but that may be
11303          * very time consuming.  Inlined ref should already make us skip a lot
11304          * of refs now.  So skip search keyed tree block ref.
11305          */
11306
11307 need_check:
11308         btrfs_release_path(&path);
11309         return 1;
11310 }
11311
11312 /*
11313  * Traversal function for tree block. We will do:
11314  * 1) Skip shared fs/subvolume tree blocks
11315  * 2) Update related bytes accounting
11316  * 3) Pre-order traversal
11317  */
11318 static int traverse_tree_block(struct btrfs_root *root,
11319                                 struct extent_buffer *node)
11320 {
11321         struct extent_buffer *eb;
11322         struct btrfs_key key;
11323         struct btrfs_key drop_key;
11324         int level;
11325         u64 nr;
11326         int i;
11327         int err = 0;
11328         int ret;
11329
11330         /*
11331          * Skip shared fs/subvolume tree block, in that case they will
11332          * be checked by referencer with lowest rootid
11333          */
11334         if (is_fstree(root->objectid) && !should_check(root, node))
11335                 return 0;
11336
11337         /* Update bytes accounting */
11338         total_btree_bytes += node->len;
11339         if (fs_root_objectid(btrfs_header_owner(node)))
11340                 total_fs_tree_bytes += node->len;
11341         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11342                 total_extent_tree_bytes += node->len;
11343         if (!found_old_backref &&
11344             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11345             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11346             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11347                 found_old_backref = 1;
11348
11349         /* pre-order tranversal, check itself first */
11350         level = btrfs_header_level(node);
11351         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11352                                    btrfs_header_level(node),
11353                                    btrfs_header_owner(node));
11354         err |= ret;
11355         if (err)
11356                 error(
11357         "check %s failed root %llu bytenr %llu level %d, force continue check",
11358                         level ? "node":"leaf", root->objectid,
11359                         btrfs_header_bytenr(node), btrfs_header_level(node));
11360
11361         if (!level) {
11362                 btree_space_waste += btrfs_leaf_free_space(root, node);
11363                 ret = check_leaf_items(root, node);
11364                 err |= ret;
11365                 return err;
11366         }
11367
11368         nr = btrfs_header_nritems(node);
11369         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11370         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11371                 sizeof(struct btrfs_key_ptr);
11372
11373         /* Then check all its children */
11374         for (i = 0; i < nr; i++) {
11375                 u64 blocknr = btrfs_node_blockptr(node, i);
11376
11377                 btrfs_node_key_to_cpu(node, &key, i);
11378                 if (level == root->root_item.drop_level &&
11379                     is_dropped_key(&key, &drop_key))
11380                         continue;
11381
11382                 /*
11383                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11384                  * to call the function itself.
11385                  */
11386                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11387                 if (extent_buffer_uptodate(eb)) {
11388                         ret = traverse_tree_block(root, eb);
11389                         err |= ret;
11390                 }
11391                 free_extent_buffer(eb);
11392         }
11393
11394         return err;
11395 }
11396
11397 /*
11398  * Low memory usage version check_chunks_and_extents.
11399  */
11400 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11401 {
11402         struct btrfs_path path;
11403         struct btrfs_key key;
11404         struct btrfs_root *root1;
11405         struct btrfs_root *cur_root;
11406         int err = 0;
11407         int ret;
11408
11409         root1 = root->fs_info->chunk_root;
11410         ret = traverse_tree_block(root1, root1->node);
11411         err |= ret;
11412
11413         root1 = root->fs_info->tree_root;
11414         ret = traverse_tree_block(root1, root1->node);
11415         err |= ret;
11416
11417         btrfs_init_path(&path);
11418         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11419         key.offset = 0;
11420         key.type = BTRFS_ROOT_ITEM_KEY;
11421
11422         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11423         if (ret) {
11424                 error("cannot find extent treet in tree_root");
11425                 goto out;
11426         }
11427
11428         while (1) {
11429                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11430                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11431                         goto next;
11432                 key.offset = (u64)-1;
11433
11434                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11435                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11436                                         &key);
11437                 else
11438                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11439                 if (IS_ERR(cur_root) || !cur_root) {
11440                         error("failed to read tree: %lld", key.objectid);
11441                         goto next;
11442                 }
11443
11444                 ret = traverse_tree_block(cur_root, cur_root->node);
11445                 err |= ret;
11446
11447                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11448                         btrfs_free_fs_root(cur_root);
11449 next:
11450                 ret = btrfs_next_item(root1, &path);
11451                 if (ret)
11452                         goto out;
11453         }
11454
11455 out:
11456         btrfs_release_path(&path);
11457         return err;
11458 }
11459
11460 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11461                            struct btrfs_root *root, int overwrite)
11462 {
11463         struct extent_buffer *c;
11464         struct extent_buffer *old = root->node;
11465         int level;
11466         int ret;
11467         struct btrfs_disk_key disk_key = {0,0,0};
11468
11469         level = 0;
11470
11471         if (overwrite) {
11472                 c = old;
11473                 extent_buffer_get(c);
11474                 goto init;
11475         }
11476         c = btrfs_alloc_free_block(trans, root,
11477                                    root->nodesize,
11478                                    root->root_key.objectid,
11479                                    &disk_key, level, 0, 0);
11480         if (IS_ERR(c)) {
11481                 c = old;
11482                 extent_buffer_get(c);
11483                 overwrite = 1;
11484         }
11485 init:
11486         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11487         btrfs_set_header_level(c, level);
11488         btrfs_set_header_bytenr(c, c->start);
11489         btrfs_set_header_generation(c, trans->transid);
11490         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11491         btrfs_set_header_owner(c, root->root_key.objectid);
11492
11493         write_extent_buffer(c, root->fs_info->fsid,
11494                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11495
11496         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11497                             btrfs_header_chunk_tree_uuid(c),
11498                             BTRFS_UUID_SIZE);
11499
11500         btrfs_mark_buffer_dirty(c);
11501         /*
11502          * this case can happen in the following case:
11503          *
11504          * 1.overwrite previous root.
11505          *
11506          * 2.reinit reloc data root, this is because we skip pin
11507          * down reloc data tree before which means we can allocate
11508          * same block bytenr here.
11509          */
11510         if (old->start == c->start) {
11511                 btrfs_set_root_generation(&root->root_item,
11512                                           trans->transid);
11513                 root->root_item.level = btrfs_header_level(root->node);
11514                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11515                                         &root->root_key, &root->root_item);
11516                 if (ret) {
11517                         free_extent_buffer(c);
11518                         return ret;
11519                 }
11520         }
11521         free_extent_buffer(old);
11522         root->node = c;
11523         add_root_to_dirty_list(root);
11524         return 0;
11525 }
11526
11527 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11528                                 struct extent_buffer *eb, int tree_root)
11529 {
11530         struct extent_buffer *tmp;
11531         struct btrfs_root_item *ri;
11532         struct btrfs_key key;
11533         u64 bytenr;
11534         u32 nodesize;
11535         int level = btrfs_header_level(eb);
11536         int nritems;
11537         int ret;
11538         int i;
11539
11540         /*
11541          * If we have pinned this block before, don't pin it again.
11542          * This can not only avoid forever loop with broken filesystem
11543          * but also give us some speedups.
11544          */
11545         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11546                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11547                 return 0;
11548
11549         btrfs_pin_extent(fs_info, eb->start, eb->len);
11550
11551         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11552         nritems = btrfs_header_nritems(eb);
11553         for (i = 0; i < nritems; i++) {
11554                 if (level == 0) {
11555                         btrfs_item_key_to_cpu(eb, &key, i);
11556                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11557                                 continue;
11558                         /* Skip the extent root and reloc roots */
11559                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11560                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11561                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11562                                 continue;
11563                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11564                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11565
11566                         /*
11567                          * If at any point we start needing the real root we
11568                          * will have to build a stump root for the root we are
11569                          * in, but for now this doesn't actually use the root so
11570                          * just pass in extent_root.
11571                          */
11572                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11573                                               nodesize, 0);
11574                         if (!extent_buffer_uptodate(tmp)) {
11575                                 fprintf(stderr, "Error reading root block\n");
11576                                 return -EIO;
11577                         }
11578                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11579                         free_extent_buffer(tmp);
11580                         if (ret)
11581                                 return ret;
11582                 } else {
11583                         bytenr = btrfs_node_blockptr(eb, i);
11584
11585                         /* If we aren't the tree root don't read the block */
11586                         if (level == 1 && !tree_root) {
11587                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11588                                 continue;
11589                         }
11590
11591                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11592                                               nodesize, 0);
11593                         if (!extent_buffer_uptodate(tmp)) {
11594                                 fprintf(stderr, "Error reading tree block\n");
11595                                 return -EIO;
11596                         }
11597                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11598                         free_extent_buffer(tmp);
11599                         if (ret)
11600                                 return ret;
11601                 }
11602         }
11603
11604         return 0;
11605 }
11606
11607 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11608 {
11609         int ret;
11610
11611         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11612         if (ret)
11613                 return ret;
11614
11615         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11616 }
11617
11618 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11619 {
11620         struct btrfs_block_group_cache *cache;
11621         struct btrfs_path path;
11622         struct extent_buffer *leaf;
11623         struct btrfs_chunk *chunk;
11624         struct btrfs_key key;
11625         int ret;
11626         u64 start;
11627
11628         btrfs_init_path(&path);
11629         key.objectid = 0;
11630         key.type = BTRFS_CHUNK_ITEM_KEY;
11631         key.offset = 0;
11632         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11633         if (ret < 0) {
11634                 btrfs_release_path(&path);
11635                 return ret;
11636         }
11637
11638         /*
11639          * We do this in case the block groups were screwed up and had alloc
11640          * bits that aren't actually set on the chunks.  This happens with
11641          * restored images every time and could happen in real life I guess.
11642          */
11643         fs_info->avail_data_alloc_bits = 0;
11644         fs_info->avail_metadata_alloc_bits = 0;
11645         fs_info->avail_system_alloc_bits = 0;
11646
11647         /* First we need to create the in-memory block groups */
11648         while (1) {
11649                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11650                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11651                         if (ret < 0) {
11652                                 btrfs_release_path(&path);
11653                                 return ret;
11654                         }
11655                         if (ret) {
11656                                 ret = 0;
11657                                 break;
11658                         }
11659                 }
11660                 leaf = path.nodes[0];
11661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11662                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11663                         path.slots[0]++;
11664                         continue;
11665                 }
11666
11667                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11668                 btrfs_add_block_group(fs_info, 0,
11669                                       btrfs_chunk_type(leaf, chunk),
11670                                       key.objectid, key.offset,
11671                                       btrfs_chunk_length(leaf, chunk));
11672                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11673                                  key.offset + btrfs_chunk_length(leaf, chunk));
11674                 path.slots[0]++;
11675         }
11676         start = 0;
11677         while (1) {
11678                 cache = btrfs_lookup_first_block_group(fs_info, start);
11679                 if (!cache)
11680                         break;
11681                 cache->cached = 1;
11682                 start = cache->key.objectid + cache->key.offset;
11683         }
11684
11685         btrfs_release_path(&path);
11686         return 0;
11687 }
11688
11689 static int reset_balance(struct btrfs_trans_handle *trans,
11690                          struct btrfs_fs_info *fs_info)
11691 {
11692         struct btrfs_root *root = fs_info->tree_root;
11693         struct btrfs_path path;
11694         struct extent_buffer *leaf;
11695         struct btrfs_key key;
11696         int del_slot, del_nr = 0;
11697         int ret;
11698         int found = 0;
11699
11700         btrfs_init_path(&path);
11701         key.objectid = BTRFS_BALANCE_OBJECTID;
11702         key.type = BTRFS_BALANCE_ITEM_KEY;
11703         key.offset = 0;
11704         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11705         if (ret) {
11706                 if (ret > 0)
11707                         ret = 0;
11708                 if (!ret)
11709                         goto reinit_data_reloc;
11710                 else
11711                         goto out;
11712         }
11713
11714         ret = btrfs_del_item(trans, root, &path);
11715         if (ret)
11716                 goto out;
11717         btrfs_release_path(&path);
11718
11719         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11720         key.type = BTRFS_ROOT_ITEM_KEY;
11721         key.offset = 0;
11722         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11723         if (ret < 0)
11724                 goto out;
11725         while (1) {
11726                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11727                         if (!found)
11728                                 break;
11729
11730                         if (del_nr) {
11731                                 ret = btrfs_del_items(trans, root, &path,
11732                                                       del_slot, del_nr);
11733                                 del_nr = 0;
11734                                 if (ret)
11735                                         goto out;
11736                         }
11737                         key.offset++;
11738                         btrfs_release_path(&path);
11739
11740                         found = 0;
11741                         ret = btrfs_search_slot(trans, root, &key, &path,
11742                                                 -1, 1);
11743                         if (ret < 0)
11744                                 goto out;
11745                         continue;
11746                 }
11747                 found = 1;
11748                 leaf = path.nodes[0];
11749                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11750                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11751                         break;
11752                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11753                         path.slots[0]++;
11754                         continue;
11755                 }
11756                 if (!del_nr) {
11757                         del_slot = path.slots[0];
11758                         del_nr = 1;
11759                 } else {
11760                         del_nr++;
11761                 }
11762                 path.slots[0]++;
11763         }
11764
11765         if (del_nr) {
11766                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11767                 if (ret)
11768                         goto out;
11769         }
11770         btrfs_release_path(&path);
11771
11772 reinit_data_reloc:
11773         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11774         key.type = BTRFS_ROOT_ITEM_KEY;
11775         key.offset = (u64)-1;
11776         root = btrfs_read_fs_root(fs_info, &key);
11777         if (IS_ERR(root)) {
11778                 fprintf(stderr, "Error reading data reloc tree\n");
11779                 ret = PTR_ERR(root);
11780                 goto out;
11781         }
11782         record_root_in_trans(trans, root);
11783         ret = btrfs_fsck_reinit_root(trans, root, 0);
11784         if (ret)
11785                 goto out;
11786         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11787 out:
11788         btrfs_release_path(&path);
11789         return ret;
11790 }
11791
11792 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11793                               struct btrfs_fs_info *fs_info)
11794 {
11795         u64 start = 0;
11796         int ret;
11797
11798         /*
11799          * The only reason we don't do this is because right now we're just
11800          * walking the trees we find and pinning down their bytes, we don't look
11801          * at any of the leaves.  In order to do mixed groups we'd have to check
11802          * the leaves of any fs roots and pin down the bytes for any file
11803          * extents we find.  Not hard but why do it if we don't have to?
11804          */
11805         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11806                 fprintf(stderr, "We don't support re-initing the extent tree "
11807                         "for mixed block groups yet, please notify a btrfs "
11808                         "developer you want to do this so they can add this "
11809                         "functionality.\n");
11810                 return -EINVAL;
11811         }
11812
11813         /*
11814          * first we need to walk all of the trees except the extent tree and pin
11815          * down the bytes that are in use so we don't overwrite any existing
11816          * metadata.
11817          */
11818         ret = pin_metadata_blocks(fs_info);
11819         if (ret) {
11820                 fprintf(stderr, "error pinning down used bytes\n");
11821                 return ret;
11822         }
11823
11824         /*
11825          * Need to drop all the block groups since we're going to recreate all
11826          * of them again.
11827          */
11828         btrfs_free_block_groups(fs_info);
11829         ret = reset_block_groups(fs_info);
11830         if (ret) {
11831                 fprintf(stderr, "error resetting the block groups\n");
11832                 return ret;
11833         }
11834
11835         /* Ok we can allocate now, reinit the extent root */
11836         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11837         if (ret) {
11838                 fprintf(stderr, "extent root initialization failed\n");
11839                 /*
11840                  * When the transaction code is updated we should end the
11841                  * transaction, but for now progs only knows about commit so
11842                  * just return an error.
11843                  */
11844                 return ret;
11845         }
11846
11847         /*
11848          * Now we have all the in-memory block groups setup so we can make
11849          * allocations properly, and the metadata we care about is safe since we
11850          * pinned all of it above.
11851          */
11852         while (1) {
11853                 struct btrfs_block_group_cache *cache;
11854
11855                 cache = btrfs_lookup_first_block_group(fs_info, start);
11856                 if (!cache)
11857                         break;
11858                 start = cache->key.objectid + cache->key.offset;
11859                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11860                                         &cache->key, &cache->item,
11861                                         sizeof(cache->item));
11862                 if (ret) {
11863                         fprintf(stderr, "Error adding block group\n");
11864                         return ret;
11865                 }
11866                 btrfs_extent_post_op(trans, fs_info->extent_root);
11867         }
11868
11869         ret = reset_balance(trans, fs_info);
11870         if (ret)
11871                 fprintf(stderr, "error resetting the pending balance\n");
11872
11873         return ret;
11874 }
11875
11876 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11877 {
11878         struct btrfs_path path;
11879         struct btrfs_trans_handle *trans;
11880         struct btrfs_key key;
11881         int ret;
11882
11883         printf("Recowing metadata block %llu\n", eb->start);
11884         key.objectid = btrfs_header_owner(eb);
11885         key.type = BTRFS_ROOT_ITEM_KEY;
11886         key.offset = (u64)-1;
11887
11888         root = btrfs_read_fs_root(root->fs_info, &key);
11889         if (IS_ERR(root)) {
11890                 fprintf(stderr, "Couldn't find owner root %llu\n",
11891                         key.objectid);
11892                 return PTR_ERR(root);
11893         }
11894
11895         trans = btrfs_start_transaction(root, 1);
11896         if (IS_ERR(trans))
11897                 return PTR_ERR(trans);
11898
11899         btrfs_init_path(&path);
11900         path.lowest_level = btrfs_header_level(eb);
11901         if (path.lowest_level)
11902                 btrfs_node_key_to_cpu(eb, &key, 0);
11903         else
11904                 btrfs_item_key_to_cpu(eb, &key, 0);
11905
11906         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11907         btrfs_commit_transaction(trans, root);
11908         btrfs_release_path(&path);
11909         return ret;
11910 }
11911
11912 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11913 {
11914         struct btrfs_path path;
11915         struct btrfs_trans_handle *trans;
11916         struct btrfs_key key;
11917         int ret;
11918
11919         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11920                bad->key.type, bad->key.offset);
11921         key.objectid = bad->root_id;
11922         key.type = BTRFS_ROOT_ITEM_KEY;
11923         key.offset = (u64)-1;
11924
11925         root = btrfs_read_fs_root(root->fs_info, &key);
11926         if (IS_ERR(root)) {
11927                 fprintf(stderr, "Couldn't find owner root %llu\n",
11928                         key.objectid);
11929                 return PTR_ERR(root);
11930         }
11931
11932         trans = btrfs_start_transaction(root, 1);
11933         if (IS_ERR(trans))
11934                 return PTR_ERR(trans);
11935
11936         btrfs_init_path(&path);
11937         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11938         if (ret) {
11939                 if (ret > 0)
11940                         ret = 0;
11941                 goto out;
11942         }
11943         ret = btrfs_del_item(trans, root, &path);
11944 out:
11945         btrfs_commit_transaction(trans, root);
11946         btrfs_release_path(&path);
11947         return ret;
11948 }
11949
11950 static int zero_log_tree(struct btrfs_root *root)
11951 {
11952         struct btrfs_trans_handle *trans;
11953         int ret;
11954
11955         trans = btrfs_start_transaction(root, 1);
11956         if (IS_ERR(trans)) {
11957                 ret = PTR_ERR(trans);
11958                 return ret;
11959         }
11960         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11961         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11962         ret = btrfs_commit_transaction(trans, root);
11963         return ret;
11964 }
11965
11966 static int populate_csum(struct btrfs_trans_handle *trans,
11967                          struct btrfs_root *csum_root, char *buf, u64 start,
11968                          u64 len)
11969 {
11970         u64 offset = 0;
11971         u64 sectorsize;
11972         int ret = 0;
11973
11974         while (offset < len) {
11975                 sectorsize = csum_root->sectorsize;
11976                 ret = read_extent_data(csum_root, buf, start + offset,
11977                                        &sectorsize, 0);
11978                 if (ret)
11979                         break;
11980                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11981                                             start + offset, buf, sectorsize);
11982                 if (ret)
11983                         break;
11984                 offset += sectorsize;
11985         }
11986         return ret;
11987 }
11988
11989 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11990                                       struct btrfs_root *csum_root,
11991                                       struct btrfs_root *cur_root)
11992 {
11993         struct btrfs_path path;
11994         struct btrfs_key key;
11995         struct extent_buffer *node;
11996         struct btrfs_file_extent_item *fi;
11997         char *buf = NULL;
11998         u64 start = 0;
11999         u64 len = 0;
12000         int slot = 0;
12001         int ret = 0;
12002
12003         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12004         if (!buf)
12005                 return -ENOMEM;
12006
12007         btrfs_init_path(&path);
12008         key.objectid = 0;
12009         key.offset = 0;
12010         key.type = 0;
12011         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12012         if (ret < 0)
12013                 goto out;
12014         /* Iterate all regular file extents and fill its csum */
12015         while (1) {
12016                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12017
12018                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12019                         goto next;
12020                 node = path.nodes[0];
12021                 slot = path.slots[0];
12022                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12023                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12024                         goto next;
12025                 start = btrfs_file_extent_disk_bytenr(node, fi);
12026                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12027
12028                 ret = populate_csum(trans, csum_root, buf, start, len);
12029                 if (ret == -EEXIST)
12030                         ret = 0;
12031                 if (ret < 0)
12032                         goto out;
12033 next:
12034                 /*
12035                  * TODO: if next leaf is corrupted, jump to nearest next valid
12036                  * leaf.
12037                  */
12038                 ret = btrfs_next_item(cur_root, &path);
12039                 if (ret < 0)
12040                         goto out;
12041                 if (ret > 0) {
12042                         ret = 0;
12043                         goto out;
12044                 }
12045         }
12046
12047 out:
12048         btrfs_release_path(&path);
12049         free(buf);
12050         return ret;
12051 }
12052
12053 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12054                                   struct btrfs_root *csum_root)
12055 {
12056         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12057         struct btrfs_path path;
12058         struct btrfs_root *tree_root = fs_info->tree_root;
12059         struct btrfs_root *cur_root;
12060         struct extent_buffer *node;
12061         struct btrfs_key key;
12062         int slot = 0;
12063         int ret = 0;
12064
12065         btrfs_init_path(&path);
12066         key.objectid = BTRFS_FS_TREE_OBJECTID;
12067         key.offset = 0;
12068         key.type = BTRFS_ROOT_ITEM_KEY;
12069         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12070         if (ret < 0)
12071                 goto out;
12072         if (ret > 0) {
12073                 ret = -ENOENT;
12074                 goto out;
12075         }
12076
12077         while (1) {
12078                 node = path.nodes[0];
12079                 slot = path.slots[0];
12080                 btrfs_item_key_to_cpu(node, &key, slot);
12081                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12082                         goto out;
12083                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12084                         goto next;
12085                 if (!is_fstree(key.objectid))
12086                         goto next;
12087                 key.offset = (u64)-1;
12088
12089                 cur_root = btrfs_read_fs_root(fs_info, &key);
12090                 if (IS_ERR(cur_root) || !cur_root) {
12091                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12092                                 key.objectid);
12093                         goto out;
12094                 }
12095                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12096                                 cur_root);
12097                 if (ret < 0)
12098                         goto out;
12099 next:
12100                 ret = btrfs_next_item(tree_root, &path);
12101                 if (ret > 0) {
12102                         ret = 0;
12103                         goto out;
12104                 }
12105                 if (ret < 0)
12106                         goto out;
12107         }
12108
12109 out:
12110         btrfs_release_path(&path);
12111         return ret;
12112 }
12113
12114 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12115                                       struct btrfs_root *csum_root)
12116 {
12117         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12118         struct btrfs_path path;
12119         struct btrfs_extent_item *ei;
12120         struct extent_buffer *leaf;
12121         char *buf;
12122         struct btrfs_key key;
12123         int ret;
12124
12125         btrfs_init_path(&path);
12126         key.objectid = 0;
12127         key.type = BTRFS_EXTENT_ITEM_KEY;
12128         key.offset = 0;
12129         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12130         if (ret < 0) {
12131                 btrfs_release_path(&path);
12132                 return ret;
12133         }
12134
12135         buf = malloc(csum_root->sectorsize);
12136         if (!buf) {
12137                 btrfs_release_path(&path);
12138                 return -ENOMEM;
12139         }
12140
12141         while (1) {
12142                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12143                         ret = btrfs_next_leaf(extent_root, &path);
12144                         if (ret < 0)
12145                                 break;
12146                         if (ret) {
12147                                 ret = 0;
12148                                 break;
12149                         }
12150                 }
12151                 leaf = path.nodes[0];
12152
12153                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12154                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12155                         path.slots[0]++;
12156                         continue;
12157                 }
12158
12159                 ei = btrfs_item_ptr(leaf, path.slots[0],
12160                                     struct btrfs_extent_item);
12161                 if (!(btrfs_extent_flags(leaf, ei) &
12162                       BTRFS_EXTENT_FLAG_DATA)) {
12163                         path.slots[0]++;
12164                         continue;
12165                 }
12166
12167                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12168                                     key.offset);
12169                 if (ret)
12170                         break;
12171                 path.slots[0]++;
12172         }
12173
12174         btrfs_release_path(&path);
12175         free(buf);
12176         return ret;
12177 }
12178
12179 /*
12180  * Recalculate the csum and put it into the csum tree.
12181  *
12182  * Extent tree init will wipe out all the extent info, so in that case, we
12183  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12184  * will use fs/subvol trees to init the csum tree.
12185  */
12186 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12187                           struct btrfs_root *csum_root,
12188                           int search_fs_tree)
12189 {
12190         if (search_fs_tree)
12191                 return fill_csum_tree_from_fs(trans, csum_root);
12192         else
12193                 return fill_csum_tree_from_extent(trans, csum_root);
12194 }
12195
12196 static void free_roots_info_cache(void)
12197 {
12198         if (!roots_info_cache)
12199                 return;
12200
12201         while (!cache_tree_empty(roots_info_cache)) {
12202                 struct cache_extent *entry;
12203                 struct root_item_info *rii;
12204
12205                 entry = first_cache_extent(roots_info_cache);
12206                 if (!entry)
12207                         break;
12208                 remove_cache_extent(roots_info_cache, entry);
12209                 rii = container_of(entry, struct root_item_info, cache_extent);
12210                 free(rii);
12211         }
12212
12213         free(roots_info_cache);
12214         roots_info_cache = NULL;
12215 }
12216
12217 static int build_roots_info_cache(struct btrfs_fs_info *info)
12218 {
12219         int ret = 0;
12220         struct btrfs_key key;
12221         struct extent_buffer *leaf;
12222         struct btrfs_path path;
12223
12224         if (!roots_info_cache) {
12225                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12226                 if (!roots_info_cache)
12227                         return -ENOMEM;
12228                 cache_tree_init(roots_info_cache);
12229         }
12230
12231         btrfs_init_path(&path);
12232         key.objectid = 0;
12233         key.type = BTRFS_EXTENT_ITEM_KEY;
12234         key.offset = 0;
12235         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12236         if (ret < 0)
12237                 goto out;
12238         leaf = path.nodes[0];
12239
12240         while (1) {
12241                 struct btrfs_key found_key;
12242                 struct btrfs_extent_item *ei;
12243                 struct btrfs_extent_inline_ref *iref;
12244                 int slot = path.slots[0];
12245                 int type;
12246                 u64 flags;
12247                 u64 root_id;
12248                 u8 level;
12249                 struct cache_extent *entry;
12250                 struct root_item_info *rii;
12251
12252                 if (slot >= btrfs_header_nritems(leaf)) {
12253                         ret = btrfs_next_leaf(info->extent_root, &path);
12254                         if (ret < 0) {
12255                                 break;
12256                         } else if (ret) {
12257                                 ret = 0;
12258                                 break;
12259                         }
12260                         leaf = path.nodes[0];
12261                         slot = path.slots[0];
12262                 }
12263
12264                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12265
12266                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12267                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12268                         goto next;
12269
12270                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12271                 flags = btrfs_extent_flags(leaf, ei);
12272
12273                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12274                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12275                         goto next;
12276
12277                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12278                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12279                         level = found_key.offset;
12280                 } else {
12281                         struct btrfs_tree_block_info *binfo;
12282
12283                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12284                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12285                         level = btrfs_tree_block_level(leaf, binfo);
12286                 }
12287
12288                 /*
12289                  * For a root extent, it must be of the following type and the
12290                  * first (and only one) iref in the item.
12291                  */
12292                 type = btrfs_extent_inline_ref_type(leaf, iref);
12293                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12294                         goto next;
12295
12296                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12297                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12298                 if (!entry) {
12299                         rii = malloc(sizeof(struct root_item_info));
12300                         if (!rii) {
12301                                 ret = -ENOMEM;
12302                                 goto out;
12303                         }
12304                         rii->cache_extent.start = root_id;
12305                         rii->cache_extent.size = 1;
12306                         rii->level = (u8)-1;
12307                         entry = &rii->cache_extent;
12308                         ret = insert_cache_extent(roots_info_cache, entry);
12309                         ASSERT(ret == 0);
12310                 } else {
12311                         rii = container_of(entry, struct root_item_info,
12312                                            cache_extent);
12313                 }
12314
12315                 ASSERT(rii->cache_extent.start == root_id);
12316                 ASSERT(rii->cache_extent.size == 1);
12317
12318                 if (level > rii->level || rii->level == (u8)-1) {
12319                         rii->level = level;
12320                         rii->bytenr = found_key.objectid;
12321                         rii->gen = btrfs_extent_generation(leaf, ei);
12322                         rii->node_count = 1;
12323                 } else if (level == rii->level) {
12324                         rii->node_count++;
12325                 }
12326 next:
12327                 path.slots[0]++;
12328         }
12329
12330 out:
12331         btrfs_release_path(&path);
12332
12333         return ret;
12334 }
12335
12336 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12337                                   struct btrfs_path *path,
12338                                   const struct btrfs_key *root_key,
12339                                   const int read_only_mode)
12340 {
12341         const u64 root_id = root_key->objectid;
12342         struct cache_extent *entry;
12343         struct root_item_info *rii;
12344         struct btrfs_root_item ri;
12345         unsigned long offset;
12346
12347         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12348         if (!entry) {
12349                 fprintf(stderr,
12350                         "Error: could not find extent items for root %llu\n",
12351                         root_key->objectid);
12352                 return -ENOENT;
12353         }
12354
12355         rii = container_of(entry, struct root_item_info, cache_extent);
12356         ASSERT(rii->cache_extent.start == root_id);
12357         ASSERT(rii->cache_extent.size == 1);
12358
12359         if (rii->node_count != 1) {
12360                 fprintf(stderr,
12361                         "Error: could not find btree root extent for root %llu\n",
12362                         root_id);
12363                 return -ENOENT;
12364         }
12365
12366         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12367         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12368
12369         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12370             btrfs_root_level(&ri) != rii->level ||
12371             btrfs_root_generation(&ri) != rii->gen) {
12372
12373                 /*
12374                  * If we're in repair mode but our caller told us to not update
12375                  * the root item, i.e. just check if it needs to be updated, don't
12376                  * print this message, since the caller will call us again shortly
12377                  * for the same root item without read only mode (the caller will
12378                  * open a transaction first).
12379                  */
12380                 if (!(read_only_mode && repair))
12381                         fprintf(stderr,
12382                                 "%sroot item for root %llu,"
12383                                 " current bytenr %llu, current gen %llu, current level %u,"
12384                                 " new bytenr %llu, new gen %llu, new level %u\n",
12385                                 (read_only_mode ? "" : "fixing "),
12386                                 root_id,
12387                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12388                                 btrfs_root_level(&ri),
12389                                 rii->bytenr, rii->gen, rii->level);
12390
12391                 if (btrfs_root_generation(&ri) > rii->gen) {
12392                         fprintf(stderr,
12393                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12394                                 root_id, btrfs_root_generation(&ri), rii->gen);
12395                         return -EINVAL;
12396                 }
12397
12398                 if (!read_only_mode) {
12399                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12400                         btrfs_set_root_level(&ri, rii->level);
12401                         btrfs_set_root_generation(&ri, rii->gen);
12402                         write_extent_buffer(path->nodes[0], &ri,
12403                                             offset, sizeof(ri));
12404                 }
12405
12406                 return 1;
12407         }
12408
12409         return 0;
12410 }
12411
12412 /*
12413  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12414  * caused read-only snapshots to be corrupted if they were created at a moment
12415  * when the source subvolume/snapshot had orphan items. The issue was that the
12416  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12417  * node instead of the post orphan cleanup root node.
12418  * So this function, and its callees, just detects and fixes those cases. Even
12419  * though the regression was for read-only snapshots, this function applies to
12420  * any snapshot/subvolume root.
12421  * This must be run before any other repair code - not doing it so, makes other
12422  * repair code delete or modify backrefs in the extent tree for example, which
12423  * will result in an inconsistent fs after repairing the root items.
12424  */
12425 static int repair_root_items(struct btrfs_fs_info *info)
12426 {
12427         struct btrfs_path path;
12428         struct btrfs_key key;
12429         struct extent_buffer *leaf;
12430         struct btrfs_trans_handle *trans = NULL;
12431         int ret = 0;
12432         int bad_roots = 0;
12433         int need_trans = 0;
12434
12435         btrfs_init_path(&path);
12436
12437         ret = build_roots_info_cache(info);
12438         if (ret)
12439                 goto out;
12440
12441         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12442         key.type = BTRFS_ROOT_ITEM_KEY;
12443         key.offset = 0;
12444
12445 again:
12446         /*
12447          * Avoid opening and committing transactions if a leaf doesn't have
12448          * any root items that need to be fixed, so that we avoid rotating
12449          * backup roots unnecessarily.
12450          */
12451         if (need_trans) {
12452                 trans = btrfs_start_transaction(info->tree_root, 1);
12453                 if (IS_ERR(trans)) {
12454                         ret = PTR_ERR(trans);
12455                         goto out;
12456                 }
12457         }
12458
12459         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12460                                 0, trans ? 1 : 0);
12461         if (ret < 0)
12462                 goto out;
12463         leaf = path.nodes[0];
12464
12465         while (1) {
12466                 struct btrfs_key found_key;
12467
12468                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12469                         int no_more_keys = find_next_key(&path, &key);
12470
12471                         btrfs_release_path(&path);
12472                         if (trans) {
12473                                 ret = btrfs_commit_transaction(trans,
12474                                                                info->tree_root);
12475                                 trans = NULL;
12476                                 if (ret < 0)
12477                                         goto out;
12478                         }
12479                         need_trans = 0;
12480                         if (no_more_keys)
12481                                 break;
12482                         goto again;
12483                 }
12484
12485                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12486
12487                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12488                         goto next;
12489                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12490                         goto next;
12491
12492                 ret = maybe_repair_root_item(info, &path, &found_key,
12493                                              trans ? 0 : 1);
12494                 if (ret < 0)
12495                         goto out;
12496                 if (ret) {
12497                         if (!trans && repair) {
12498                                 need_trans = 1;
12499                                 key = found_key;
12500                                 btrfs_release_path(&path);
12501                                 goto again;
12502                         }
12503                         bad_roots++;
12504                 }
12505 next:
12506                 path.slots[0]++;
12507         }
12508         ret = 0;
12509 out:
12510         free_roots_info_cache();
12511         btrfs_release_path(&path);
12512         if (trans)
12513                 btrfs_commit_transaction(trans, info->tree_root);
12514         if (ret < 0)
12515                 return ret;
12516
12517         return bad_roots;
12518 }
12519
12520 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12521 {
12522         struct btrfs_trans_handle *trans;
12523         struct btrfs_block_group_cache *bg_cache;
12524         u64 current = 0;
12525         int ret = 0;
12526
12527         /* Clear all free space cache inodes and its extent data */
12528         while (1) {
12529                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12530                 if (!bg_cache)
12531                         break;
12532                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12533                 if (ret < 0)
12534                         return ret;
12535                 current = bg_cache->key.objectid + bg_cache->key.offset;
12536         }
12537
12538         /* Don't forget to set cache_generation to -1 */
12539         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12540         if (IS_ERR(trans)) {
12541                 error("failed to update super block cache generation");
12542                 return PTR_ERR(trans);
12543         }
12544         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12545         btrfs_commit_transaction(trans, fs_info->tree_root);
12546
12547         return ret;
12548 }
12549
12550 const char * const cmd_check_usage[] = {
12551         "btrfs check [options] <device>",
12552         "Check structural integrity of a filesystem (unmounted).",
12553         "Check structural integrity of an unmounted filesystem. Verify internal",
12554         "trees' consistency and item connectivity. In the repair mode try to",
12555         "fix the problems found. ",
12556         "WARNING: the repair mode is considered dangerous",
12557         "",
12558         "-s|--super <superblock>     use this superblock copy",
12559         "-b|--backup                 use the first valid backup root copy",
12560         "--repair                    try to repair the filesystem",
12561         "--readonly                  run in read-only mode (default)",
12562         "--init-csum-tree            create a new CRC tree",
12563         "--init-extent-tree          create a new extent tree",
12564         "--mode <MODE>               allows choice of memory/IO trade-offs",
12565         "                            where MODE is one of:",
12566         "                            original - read inodes and extents to memory (requires",
12567         "                                       more memory, does less IO)",
12568         "                            lowmem   - try to use less memory but read blocks again",
12569         "                                       when needed",
12570         "--check-data-csum           verify checksums of data blocks",
12571         "-Q|--qgroup-report          print a report on qgroup consistency",
12572         "-E|--subvol-extents <subvolid>",
12573         "                            print subvolume extents and sharing state",
12574         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12575         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12576         "-p|--progress               indicate progress",
12577         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12578         NULL
12579 };
12580
12581 int cmd_check(int argc, char **argv)
12582 {
12583         struct cache_tree root_cache;
12584         struct btrfs_root *root;
12585         struct btrfs_fs_info *info;
12586         u64 bytenr = 0;
12587         u64 subvolid = 0;
12588         u64 tree_root_bytenr = 0;
12589         u64 chunk_root_bytenr = 0;
12590         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12591         int ret;
12592         int err = 0;
12593         u64 num;
12594         int init_csum_tree = 0;
12595         int readonly = 0;
12596         int clear_space_cache = 0;
12597         int qgroup_report = 0;
12598         int qgroups_repaired = 0;
12599         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12600
12601         while(1) {
12602                 int c;
12603                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12604                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12605                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12606                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12607                 static const struct option long_options[] = {
12608                         { "super", required_argument, NULL, 's' },
12609                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12610                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12611                         { "init-csum-tree", no_argument, NULL,
12612                                 GETOPT_VAL_INIT_CSUM },
12613                         { "init-extent-tree", no_argument, NULL,
12614                                 GETOPT_VAL_INIT_EXTENT },
12615                         { "check-data-csum", no_argument, NULL,
12616                                 GETOPT_VAL_CHECK_CSUM },
12617                         { "backup", no_argument, NULL, 'b' },
12618                         { "subvol-extents", required_argument, NULL, 'E' },
12619                         { "qgroup-report", no_argument, NULL, 'Q' },
12620                         { "tree-root", required_argument, NULL, 'r' },
12621                         { "chunk-root", required_argument, NULL,
12622                                 GETOPT_VAL_CHUNK_TREE },
12623                         { "progress", no_argument, NULL, 'p' },
12624                         { "mode", required_argument, NULL,
12625                                 GETOPT_VAL_MODE },
12626                         { "clear-space-cache", required_argument, NULL,
12627                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12628                         { NULL, 0, NULL, 0}
12629                 };
12630
12631                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12632                 if (c < 0)
12633                         break;
12634                 switch(c) {
12635                         case 'a': /* ignored */ break;
12636                         case 'b':
12637                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12638                                 break;
12639                         case 's':
12640                                 num = arg_strtou64(optarg);
12641                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12642                                         error(
12643                                         "super mirror should be less than %d",
12644                                                 BTRFS_SUPER_MIRROR_MAX);
12645                                         exit(1);
12646                                 }
12647                                 bytenr = btrfs_sb_offset(((int)num));
12648                                 printf("using SB copy %llu, bytenr %llu\n", num,
12649                                        (unsigned long long)bytenr);
12650                                 break;
12651                         case 'Q':
12652                                 qgroup_report = 1;
12653                                 break;
12654                         case 'E':
12655                                 subvolid = arg_strtou64(optarg);
12656                                 break;
12657                         case 'r':
12658                                 tree_root_bytenr = arg_strtou64(optarg);
12659                                 break;
12660                         case GETOPT_VAL_CHUNK_TREE:
12661                                 chunk_root_bytenr = arg_strtou64(optarg);
12662                                 break;
12663                         case 'p':
12664                                 ctx.progress_enabled = true;
12665                                 break;
12666                         case '?':
12667                         case 'h':
12668                                 usage(cmd_check_usage);
12669                         case GETOPT_VAL_REPAIR:
12670                                 printf("enabling repair mode\n");
12671                                 repair = 1;
12672                                 ctree_flags |= OPEN_CTREE_WRITES;
12673                                 break;
12674                         case GETOPT_VAL_READONLY:
12675                                 readonly = 1;
12676                                 break;
12677                         case GETOPT_VAL_INIT_CSUM:
12678                                 printf("Creating a new CRC tree\n");
12679                                 init_csum_tree = 1;
12680                                 repair = 1;
12681                                 ctree_flags |= OPEN_CTREE_WRITES;
12682                                 break;
12683                         case GETOPT_VAL_INIT_EXTENT:
12684                                 init_extent_tree = 1;
12685                                 ctree_flags |= (OPEN_CTREE_WRITES |
12686                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12687                                 repair = 1;
12688                                 break;
12689                         case GETOPT_VAL_CHECK_CSUM:
12690                                 check_data_csum = 1;
12691                                 break;
12692                         case GETOPT_VAL_MODE:
12693                                 check_mode = parse_check_mode(optarg);
12694                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12695                                         error("unknown mode: %s", optarg);
12696                                         exit(1);
12697                                 }
12698                                 break;
12699                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12700                                 if (strcmp(optarg, "v1") == 0) {
12701                                         clear_space_cache = 1;
12702                                 } else if (strcmp(optarg, "v2") == 0) {
12703                                         clear_space_cache = 2;
12704                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12705                                 } else {
12706                                         error(
12707                 "invalid argument to --clear-space-cache, must be v1 or v2");
12708                                         exit(1);
12709                                 }
12710                                 ctree_flags |= OPEN_CTREE_WRITES;
12711                                 break;
12712                 }
12713         }
12714
12715         if (check_argc_exact(argc - optind, 1))
12716                 usage(cmd_check_usage);
12717
12718         if (ctx.progress_enabled) {
12719                 ctx.tp = TASK_NOTHING;
12720                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12721         }
12722
12723         /* This check is the only reason for --readonly to exist */
12724         if (readonly && repair) {
12725                 error("repair options are not compatible with --readonly");
12726                 exit(1);
12727         }
12728
12729         /*
12730          * Not supported yet
12731          */
12732         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12733                 error("low memory mode doesn't support repair yet");
12734                 exit(1);
12735         }
12736
12737         radix_tree_init();
12738         cache_tree_init(&root_cache);
12739
12740         if((ret = check_mounted(argv[optind])) < 0) {
12741                 error("could not check mount status: %s", strerror(-ret));
12742                 err |= !!ret;
12743                 goto err_out;
12744         } else if(ret) {
12745                 error("%s is currently mounted, aborting", argv[optind]);
12746                 ret = -EBUSY;
12747                 err |= !!ret;
12748                 goto err_out;
12749         }
12750
12751         /* only allow partial opening under repair mode */
12752         if (repair)
12753                 ctree_flags |= OPEN_CTREE_PARTIAL;
12754
12755         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12756                                   chunk_root_bytenr, ctree_flags);
12757         if (!info) {
12758                 error("cannot open file system");
12759                 ret = -EIO;
12760                 err |= !!ret;
12761                 goto err_out;
12762         }
12763
12764         global_info = info;
12765         root = info->fs_root;
12766         if (clear_space_cache == 1) {
12767                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12768                         error(
12769                 "free space cache v2 detected, use --clear-space-cache v2");
12770                         ret = 1;
12771                         goto close_out;
12772                 }
12773                 printf("Clearing free space cache\n");
12774                 ret = clear_free_space_cache(info);
12775                 if (ret) {
12776                         error("failed to clear free space cache");
12777                         ret = 1;
12778                 } else {
12779                         printf("Free space cache cleared\n");
12780                 }
12781                 goto close_out;
12782         } else if (clear_space_cache == 2) {
12783                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12784                         printf("no free space cache v2 to clear\n");
12785                         ret = 0;
12786                         goto close_out;
12787                 }
12788                 printf("Clear free space cache v2\n");
12789                 ret = btrfs_clear_free_space_tree(info);
12790                 if (ret) {
12791                         error("failed to clear free space cache v2: %d", ret);
12792                         ret = 1;
12793                 } else {
12794                         printf("free space cache v2 cleared\n");
12795                 }
12796                 goto close_out;
12797         }
12798
12799         /*
12800          * repair mode will force us to commit transaction which
12801          * will make us fail to load log tree when mounting.
12802          */
12803         if (repair && btrfs_super_log_root(info->super_copy)) {
12804                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12805                 if (!ret) {
12806                         ret = 1;
12807                         err |= !!ret;
12808                         goto close_out;
12809                 }
12810                 ret = zero_log_tree(root);
12811                 err |= !!ret;
12812                 if (ret) {
12813                         error("failed to zero log tree: %d", ret);
12814                         goto close_out;
12815                 }
12816         }
12817
12818         uuid_unparse(info->super_copy->fsid, uuidbuf);
12819         if (qgroup_report) {
12820                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12821                        uuidbuf);
12822                 ret = qgroup_verify_all(info);
12823                 err |= !!ret;
12824                 if (ret == 0)
12825                         report_qgroups(1);
12826                 goto close_out;
12827         }
12828         if (subvolid) {
12829                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12830                        subvolid, argv[optind], uuidbuf);
12831                 ret = print_extent_state(info, subvolid);
12832                 err |= !!ret;
12833                 goto close_out;
12834         }
12835         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12836
12837         if (!extent_buffer_uptodate(info->tree_root->node) ||
12838             !extent_buffer_uptodate(info->dev_root->node) ||
12839             !extent_buffer_uptodate(info->chunk_root->node)) {
12840                 error("critical roots corrupted, unable to check the filesystem");
12841                 err |= !!ret;
12842                 ret = -EIO;
12843                 goto close_out;
12844         }
12845
12846         if (init_extent_tree || init_csum_tree) {
12847                 struct btrfs_trans_handle *trans;
12848
12849                 trans = btrfs_start_transaction(info->extent_root, 0);
12850                 if (IS_ERR(trans)) {
12851                         error("error starting transaction");
12852                         ret = PTR_ERR(trans);
12853                         err |= !!ret;
12854                         goto close_out;
12855                 }
12856
12857                 if (init_extent_tree) {
12858                         printf("Creating a new extent tree\n");
12859                         ret = reinit_extent_tree(trans, info);
12860                         err |= !!ret;
12861                         if (ret)
12862                                 goto close_out;
12863                 }
12864
12865                 if (init_csum_tree) {
12866                         printf("Reinitialize checksum tree\n");
12867                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12868                         if (ret) {
12869                                 error("checksum tree initialization failed: %d",
12870                                                 ret);
12871                                 ret = -EIO;
12872                                 err |= !!ret;
12873                                 goto close_out;
12874                         }
12875
12876                         ret = fill_csum_tree(trans, info->csum_root,
12877                                              init_extent_tree);
12878                         err |= !!ret;
12879                         if (ret) {
12880                                 error("checksum tree refilling failed: %d", ret);
12881                                 return -EIO;
12882                         }
12883                 }
12884                 /*
12885                  * Ok now we commit and run the normal fsck, which will add
12886                  * extent entries for all of the items it finds.
12887                  */
12888                 ret = btrfs_commit_transaction(trans, info->extent_root);
12889                 err |= !!ret;
12890                 if (ret)
12891                         goto close_out;
12892         }
12893         if (!extent_buffer_uptodate(info->extent_root->node)) {
12894                 error("critical: extent_root, unable to check the filesystem");
12895                 ret = -EIO;
12896                 err |= !!ret;
12897                 goto close_out;
12898         }
12899         if (!extent_buffer_uptodate(info->csum_root->node)) {
12900                 error("critical: csum_root, unable to check the filesystem");
12901                 ret = -EIO;
12902                 err |= !!ret;
12903                 goto close_out;
12904         }
12905
12906         if (!ctx.progress_enabled)
12907                 fprintf(stderr, "checking extents\n");
12908         if (check_mode == CHECK_MODE_LOWMEM)
12909                 ret = check_chunks_and_extents_v2(root);
12910         else
12911                 ret = check_chunks_and_extents(root);
12912         err |= !!ret;
12913         if (ret)
12914                 error(
12915                 "errors found in extent allocation tree or chunk allocation");
12916
12917         ret = repair_root_items(info);
12918         err |= !!ret;
12919         if (ret < 0)
12920                 goto close_out;
12921         if (repair) {
12922                 fprintf(stderr, "Fixed %d roots.\n", ret);
12923                 ret = 0;
12924         } else if (ret > 0) {
12925                 fprintf(stderr,
12926                        "Found %d roots with an outdated root item.\n",
12927                        ret);
12928                 fprintf(stderr,
12929                         "Please run a filesystem check with the option --repair to fix them.\n");
12930                 ret = 1;
12931                 err |= !!ret;
12932                 goto close_out;
12933         }
12934
12935         if (!ctx.progress_enabled) {
12936                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12937                         fprintf(stderr, "checking free space tree\n");
12938                 else
12939                         fprintf(stderr, "checking free space cache\n");
12940         }
12941         ret = check_space_cache(root);
12942         err |= !!ret;
12943         if (ret)
12944                 goto out;
12945
12946         /*
12947          * We used to have to have these hole extents in between our real
12948          * extents so if we don't have this flag set we need to make sure there
12949          * are no gaps in the file extents for inodes, otherwise we can just
12950          * ignore it when this happens.
12951          */
12952         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12953         if (!ctx.progress_enabled)
12954                 fprintf(stderr, "checking fs roots\n");
12955         if (check_mode == CHECK_MODE_LOWMEM)
12956                 ret = check_fs_roots_v2(root->fs_info);
12957         else
12958                 ret = check_fs_roots(root, &root_cache);
12959         err |= !!ret;
12960         if (ret)
12961                 goto out;
12962
12963         fprintf(stderr, "checking csums\n");
12964         ret = check_csums(root);
12965         err |= !!ret;
12966         if (ret)
12967                 goto out;
12968
12969         fprintf(stderr, "checking root refs\n");
12970         /* For low memory mode, check_fs_roots_v2 handles root refs */
12971         if (check_mode != CHECK_MODE_LOWMEM) {
12972                 ret = check_root_refs(root, &root_cache);
12973                 err |= !!ret;
12974                 if (ret)
12975                         goto out;
12976         }
12977
12978         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12979                 struct extent_buffer *eb;
12980
12981                 eb = list_first_entry(&root->fs_info->recow_ebs,
12982                                       struct extent_buffer, recow);
12983                 list_del_init(&eb->recow);
12984                 ret = recow_extent_buffer(root, eb);
12985                 err |= !!ret;
12986                 if (ret)
12987                         break;
12988         }
12989
12990         while (!list_empty(&delete_items)) {
12991                 struct bad_item *bad;
12992
12993                 bad = list_first_entry(&delete_items, struct bad_item, list);
12994                 list_del_init(&bad->list);
12995                 if (repair) {
12996                         ret = delete_bad_item(root, bad);
12997                         err |= !!ret;
12998                 }
12999                 free(bad);
13000         }
13001
13002         if (info->quota_enabled) {
13003                 fprintf(stderr, "checking quota groups\n");
13004                 ret = qgroup_verify_all(info);
13005                 err |= !!ret;
13006                 if (ret)
13007                         goto out;
13008                 report_qgroups(0);
13009                 ret = repair_qgroups(info, &qgroups_repaired);
13010                 err |= !!ret;
13011                 if (err)
13012                         goto out;
13013                 ret = 0;
13014         }
13015
13016         if (!list_empty(&root->fs_info->recow_ebs)) {
13017                 error("transid errors in file system");
13018                 ret = 1;
13019                 err |= !!ret;
13020         }
13021 out:
13022         if (found_old_backref) { /*
13023                  * there was a disk format change when mixed
13024                  * backref was in testing tree. The old format
13025                  * existed about one week.
13026                  */
13027                 printf("\n * Found old mixed backref format. "
13028                        "The old format is not supported! *"
13029                        "\n * Please mount the FS in readonly mode, "
13030                        "backup data and re-format the FS. *\n\n");
13031                 err |= 1;
13032         }
13033         printf("found %llu bytes used err is %d\n",
13034                (unsigned long long)bytes_used, ret);
13035         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13036         printf("total tree bytes: %llu\n",
13037                (unsigned long long)total_btree_bytes);
13038         printf("total fs tree bytes: %llu\n",
13039                (unsigned long long)total_fs_tree_bytes);
13040         printf("total extent tree bytes: %llu\n",
13041                (unsigned long long)total_extent_tree_bytes);
13042         printf("btree space waste bytes: %llu\n",
13043                (unsigned long long)btree_space_waste);
13044         printf("file data blocks allocated: %llu\n referenced %llu\n",
13045                 (unsigned long long)data_bytes_allocated,
13046                 (unsigned long long)data_bytes_referenced);
13047
13048         free_qgroup_counts();
13049         free_root_recs_tree(&root_cache);
13050 close_out:
13051         close_ctree(root);
13052 err_out:
13053         if (ctx.progress_enabled)
13054                 task_deinit(ctx.info);
13055
13056         return err;
13057 }