btrfs-progs: Refactor btrfs_num_copies to use btrfs_fs_info
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize,
833                                          root->fs_info->sectorsize));
834         }
835 }
836
837 static void print_ref_error(int errors)
838 {
839         if (errors & REF_ERR_NO_DIR_ITEM)
840                 fprintf(stderr, ", no dir item");
841         if (errors & REF_ERR_NO_DIR_INDEX)
842                 fprintf(stderr, ", no dir index");
843         if (errors & REF_ERR_NO_INODE_REF)
844                 fprintf(stderr, ", no inode ref");
845         if (errors & REF_ERR_DUP_DIR_ITEM)
846                 fprintf(stderr, ", dup dir item");
847         if (errors & REF_ERR_DUP_DIR_INDEX)
848                 fprintf(stderr, ", dup dir index");
849         if (errors & REF_ERR_DUP_INODE_REF)
850                 fprintf(stderr, ", dup inode ref");
851         if (errors & REF_ERR_INDEX_UNMATCH)
852                 fprintf(stderr, ", index mismatch");
853         if (errors & REF_ERR_FILETYPE_UNMATCH)
854                 fprintf(stderr, ", filetype mismatch");
855         if (errors & REF_ERR_NAME_TOO_LONG)
856                 fprintf(stderr, ", name too long");
857         if (errors & REF_ERR_NO_ROOT_REF)
858                 fprintf(stderr, ", no root ref");
859         if (errors & REF_ERR_NO_ROOT_BACKREF)
860                 fprintf(stderr, ", no root backref");
861         if (errors & REF_ERR_DUP_ROOT_REF)
862                 fprintf(stderr, ", dup root ref");
863         if (errors & REF_ERR_DUP_ROOT_BACKREF)
864                 fprintf(stderr, ", dup root backref");
865         fprintf(stderr, "\n");
866 }
867
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869                                           u64 ino, int mod)
870 {
871         struct ptr_node *node;
872         struct cache_extent *cache;
873         struct inode_record *rec = NULL;
874         int ret;
875
876         cache = lookup_cache_extent(inode_cache, ino, 1);
877         if (cache) {
878                 node = container_of(cache, struct ptr_node, cache);
879                 rec = node->data;
880                 if (mod && rec->refs > 1) {
881                         node->data = clone_inode_rec(rec);
882                         if (IS_ERR(node->data))
883                                 return node->data;
884                         rec->refs--;
885                         rec = node->data;
886                 }
887         } else if (mod) {
888                 rec = calloc(1, sizeof(*rec));
889                 if (!rec)
890                         return ERR_PTR(-ENOMEM);
891                 rec->ino = ino;
892                 rec->extent_start = (u64)-1;
893                 rec->refs = 1;
894                 INIT_LIST_HEAD(&rec->backrefs);
895                 INIT_LIST_HEAD(&rec->orphan_extents);
896                 rec->holes = RB_ROOT;
897
898                 node = malloc(sizeof(*node));
899                 if (!node) {
900                         free(rec);
901                         return ERR_PTR(-ENOMEM);
902                 }
903                 node->cache.start = ino;
904                 node->cache.size = 1;
905                 node->data = rec;
906
907                 if (ino == BTRFS_FREE_INO_OBJECTID)
908                         rec->found_link = 1;
909
910                 ret = insert_cache_extent(inode_cache, &node->cache);
911                 if (ret)
912                         return ERR_PTR(-EEXIST);
913         }
914         return rec;
915 }
916
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 {
919         struct orphan_data_extent *orphan;
920
921         while (!list_empty(orphan_extents)) {
922                 orphan = list_entry(orphan_extents->next,
923                                     struct orphan_data_extent, list);
924                 list_del(&orphan->list);
925                 free(orphan);
926         }
927 }
928
929 static void free_inode_rec(struct inode_record *rec)
930 {
931         struct inode_backref *backref;
932
933         if (--rec->refs > 0)
934                 return;
935
936         while (!list_empty(&rec->backrefs)) {
937                 backref = to_inode_backref(rec->backrefs.next);
938                 list_del(&backref->list);
939                 free(backref);
940         }
941         free_orphan_data_extents(&rec->orphan_extents);
942         free_file_extent_holes(&rec->holes);
943         free(rec);
944 }
945
946 static int can_free_inode_rec(struct inode_record *rec)
947 {
948         if (!rec->errors && rec->checked && rec->found_inode_item &&
949             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950                 return 1;
951         return 0;
952 }
953
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955                                  struct inode_record *rec)
956 {
957         struct cache_extent *cache;
958         struct inode_backref *tmp, *backref;
959         struct ptr_node *node;
960         u8 filetype;
961
962         if (!rec->found_inode_item)
963                 return;
964
965         filetype = imode_to_type(rec->imode);
966         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967                 if (backref->found_dir_item && backref->found_dir_index) {
968                         if (backref->filetype != filetype)
969                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970                         if (!backref->errors && backref->found_inode_ref &&
971                             rec->nlink == rec->found_link) {
972                                 list_del(&backref->list);
973                                 free(backref);
974                         }
975                 }
976         }
977
978         if (!rec->checked || rec->merging)
979                 return;
980
981         if (S_ISDIR(rec->imode)) {
982                 if (rec->found_size != rec->isize)
983                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984                 if (rec->found_file_extent)
985                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
986         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987                 if (rec->found_dir_item)
988                         rec->errors |= I_ERR_ODD_DIR_ITEM;
989                 if (rec->found_size != rec->nbytes)
990                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991                 if (rec->nlink > 0 && !no_holes &&
992                     (rec->extent_end < rec->isize ||
993                      first_extent_gap(&rec->holes) < rec->isize))
994                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995         }
996
997         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998                 if (rec->found_csum_item && rec->nodatasum)
999                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000                 if (rec->some_csum_missing && !rec->nodatasum)
1001                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002         }
1003
1004         BUG_ON(rec->refs != 1);
1005         if (can_free_inode_rec(rec)) {
1006                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007                 node = container_of(cache, struct ptr_node, cache);
1008                 BUG_ON(node->data != rec);
1009                 remove_cache_extent(inode_cache, &node->cache);
1010                 free(node);
1011                 free_inode_rec(rec);
1012         }
1013 }
1014
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 {
1017         struct btrfs_path path;
1018         struct btrfs_key key;
1019         int ret;
1020
1021         key.objectid = BTRFS_ORPHAN_OBJECTID;
1022         key.type = BTRFS_ORPHAN_ITEM_KEY;
1023         key.offset = ino;
1024
1025         btrfs_init_path(&path);
1026         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027         btrfs_release_path(&path);
1028         if (ret > 0)
1029                 ret = -ENOENT;
1030         return ret;
1031 }
1032
1033 static int process_inode_item(struct extent_buffer *eb,
1034                               int slot, struct btrfs_key *key,
1035                               struct shared_node *active_node)
1036 {
1037         struct inode_record *rec;
1038         struct btrfs_inode_item *item;
1039
1040         rec = active_node->current;
1041         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042         if (rec->found_inode_item) {
1043                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044                 return 1;
1045         }
1046         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047         rec->nlink = btrfs_inode_nlink(eb, item);
1048         rec->isize = btrfs_inode_size(eb, item);
1049         rec->nbytes = btrfs_inode_nbytes(eb, item);
1050         rec->imode = btrfs_inode_mode(eb, item);
1051         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052                 rec->nodatasum = 1;
1053         rec->found_inode_item = 1;
1054         if (rec->nlink == 0)
1055                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056         maybe_free_inode_rec(&active_node->inode_cache, rec);
1057         return 0;
1058 }
1059
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061                                                 const char *name,
1062                                                 int namelen, u64 dir)
1063 {
1064         struct inode_backref *backref;
1065
1066         list_for_each_entry(backref, &rec->backrefs, list) {
1067                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068                         break;
1069                 if (backref->dir != dir || backref->namelen != namelen)
1070                         continue;
1071                 if (memcmp(name, backref->name, namelen))
1072                         continue;
1073                 return backref;
1074         }
1075
1076         backref = malloc(sizeof(*backref) + namelen + 1);
1077         if (!backref)
1078                 return NULL;
1079         memset(backref, 0, sizeof(*backref));
1080         backref->dir = dir;
1081         backref->namelen = namelen;
1082         memcpy(backref->name, name, namelen);
1083         backref->name[namelen] = '\0';
1084         list_add_tail(&backref->list, &rec->backrefs);
1085         return backref;
1086 }
1087
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089                              u64 ino, u64 dir, u64 index,
1090                              const char *name, int namelen,
1091                              u8 filetype, u8 itemtype, int errors)
1092 {
1093         struct inode_record *rec;
1094         struct inode_backref *backref;
1095
1096         rec = get_inode_rec(inode_cache, ino, 1);
1097         BUG_ON(IS_ERR(rec));
1098         backref = get_inode_backref(rec, name, namelen, dir);
1099         BUG_ON(!backref);
1100         if (errors)
1101                 backref->errors |= errors;
1102         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103                 if (backref->found_dir_index)
1104                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105                 if (backref->found_inode_ref && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 if (backref->found_dir_item && backref->filetype != filetype)
1108                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109
1110                 backref->index = index;
1111                 backref->filetype = filetype;
1112                 backref->found_dir_index = 1;
1113         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114                 rec->found_link++;
1115                 if (backref->found_dir_item)
1116                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117                 if (backref->found_dir_index && backref->filetype != filetype)
1118                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119
1120                 backref->filetype = filetype;
1121                 backref->found_dir_item = 1;
1122         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124                 if (backref->found_inode_ref)
1125                         backref->errors |= REF_ERR_DUP_INODE_REF;
1126                 if (backref->found_dir_index && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 else
1129                         backref->index = index;
1130
1131                 backref->ref_type = itemtype;
1132                 backref->found_inode_ref = 1;
1133         } else {
1134                 BUG_ON(1);
1135         }
1136
1137         maybe_free_inode_rec(inode_cache, rec);
1138         return 0;
1139 }
1140
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142                             struct cache_tree *dst_cache)
1143 {
1144         struct inode_backref *backref;
1145         u32 dir_count = 0;
1146         int ret = 0;
1147
1148         dst->merging = 1;
1149         list_for_each_entry(backref, &src->backrefs, list) {
1150                 if (backref->found_dir_index) {
1151                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1152                                         backref->index, backref->name,
1153                                         backref->namelen, backref->filetype,
1154                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1155                 }
1156                 if (backref->found_dir_item) {
1157                         dir_count++;
1158                         add_inode_backref(dst_cache, dst->ino,
1159                                         backref->dir, 0, backref->name,
1160                                         backref->namelen, backref->filetype,
1161                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1162                 }
1163                 if (backref->found_inode_ref) {
1164                         add_inode_backref(dst_cache, dst->ino,
1165                                         backref->dir, backref->index,
1166                                         backref->name, backref->namelen, 0,
1167                                         backref->ref_type, backref->errors);
1168                 }
1169         }
1170
1171         if (src->found_dir_item)
1172                 dst->found_dir_item = 1;
1173         if (src->found_file_extent)
1174                 dst->found_file_extent = 1;
1175         if (src->found_csum_item)
1176                 dst->found_csum_item = 1;
1177         if (src->some_csum_missing)
1178                 dst->some_csum_missing = 1;
1179         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181                 if (ret < 0)
1182                         return ret;
1183         }
1184
1185         BUG_ON(src->found_link < dir_count);
1186         dst->found_link += src->found_link - dir_count;
1187         dst->found_size += src->found_size;
1188         if (src->extent_start != (u64)-1) {
1189                 if (dst->extent_start == (u64)-1) {
1190                         dst->extent_start = src->extent_start;
1191                         dst->extent_end = src->extent_end;
1192                 } else {
1193                         if (dst->extent_end > src->extent_start)
1194                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195                         else if (dst->extent_end < src->extent_start) {
1196                                 ret = add_file_extent_hole(&dst->holes,
1197                                         dst->extent_end,
1198                                         src->extent_start - dst->extent_end);
1199                         }
1200                         if (dst->extent_end < src->extent_end)
1201                                 dst->extent_end = src->extent_end;
1202                 }
1203         }
1204
1205         dst->errors |= src->errors;
1206         if (src->found_inode_item) {
1207                 if (!dst->found_inode_item) {
1208                         dst->nlink = src->nlink;
1209                         dst->isize = src->isize;
1210                         dst->nbytes = src->nbytes;
1211                         dst->imode = src->imode;
1212                         dst->nodatasum = src->nodatasum;
1213                         dst->found_inode_item = 1;
1214                 } else {
1215                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1216                 }
1217         }
1218         dst->merging = 0;
1219
1220         return 0;
1221 }
1222
1223 static int splice_shared_node(struct shared_node *src_node,
1224                               struct shared_node *dst_node)
1225 {
1226         struct cache_extent *cache;
1227         struct ptr_node *node, *ins;
1228         struct cache_tree *src, *dst;
1229         struct inode_record *rec, *conflict;
1230         u64 current_ino = 0;
1231         int splice = 0;
1232         int ret;
1233
1234         if (--src_node->refs == 0)
1235                 splice = 1;
1236         if (src_node->current)
1237                 current_ino = src_node->current->ino;
1238
1239         src = &src_node->root_cache;
1240         dst = &dst_node->root_cache;
1241 again:
1242         cache = search_cache_extent(src, 0);
1243         while (cache) {
1244                 node = container_of(cache, struct ptr_node, cache);
1245                 rec = node->data;
1246                 cache = next_cache_extent(cache);
1247
1248                 if (splice) {
1249                         remove_cache_extent(src, &node->cache);
1250                         ins = node;
1251                 } else {
1252                         ins = malloc(sizeof(*ins));
1253                         BUG_ON(!ins);
1254                         ins->cache.start = node->cache.start;
1255                         ins->cache.size = node->cache.size;
1256                         ins->data = rec;
1257                         rec->refs++;
1258                 }
1259                 ret = insert_cache_extent(dst, &ins->cache);
1260                 if (ret == -EEXIST) {
1261                         conflict = get_inode_rec(dst, rec->ino, 1);
1262                         BUG_ON(IS_ERR(conflict));
1263                         merge_inode_recs(rec, conflict, dst);
1264                         if (rec->checked) {
1265                                 conflict->checked = 1;
1266                                 if (dst_node->current == conflict)
1267                                         dst_node->current = NULL;
1268                         }
1269                         maybe_free_inode_rec(dst, conflict);
1270                         free_inode_rec(rec);
1271                         free(ins);
1272                 } else {
1273                         BUG_ON(ret);
1274                 }
1275         }
1276
1277         if (src == &src_node->root_cache) {
1278                 src = &src_node->inode_cache;
1279                 dst = &dst_node->inode_cache;
1280                 goto again;
1281         }
1282
1283         if (current_ino > 0 && (!dst_node->current ||
1284             current_ino > dst_node->current->ino)) {
1285                 if (dst_node->current) {
1286                         dst_node->current->checked = 1;
1287                         maybe_free_inode_rec(dst, dst_node->current);
1288                 }
1289                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290                 BUG_ON(IS_ERR(dst_node->current));
1291         }
1292         return 0;
1293 }
1294
1295 static void free_inode_ptr(struct cache_extent *cache)
1296 {
1297         struct ptr_node *node;
1298         struct inode_record *rec;
1299
1300         node = container_of(cache, struct ptr_node, cache);
1301         rec = node->data;
1302         free_inode_rec(rec);
1303         free(node);
1304 }
1305
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309                                             u64 bytenr)
1310 {
1311         struct cache_extent *cache;
1312         struct shared_node *node;
1313
1314         cache = lookup_cache_extent(shared, bytenr, 1);
1315         if (cache) {
1316                 node = container_of(cache, struct shared_node, cache);
1317                 return node;
1318         }
1319         return NULL;
1320 }
1321
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 {
1324         int ret;
1325         struct shared_node *node;
1326
1327         node = calloc(1, sizeof(*node));
1328         if (!node)
1329                 return -ENOMEM;
1330         node->cache.start = bytenr;
1331         node->cache.size = 1;
1332         cache_tree_init(&node->root_cache);
1333         cache_tree_init(&node->inode_cache);
1334         node->refs = refs;
1335
1336         ret = insert_cache_extent(shared, &node->cache);
1337
1338         return ret;
1339 }
1340
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342                              struct walk_control *wc, int level)
1343 {
1344         struct shared_node *node;
1345         struct shared_node *dest;
1346         int ret;
1347
1348         if (level == wc->active_node)
1349                 return 0;
1350
1351         BUG_ON(wc->active_node <= level);
1352         node = find_shared_node(&wc->shared, bytenr);
1353         if (!node) {
1354                 ret = add_shared_node(&wc->shared, bytenr, refs);
1355                 BUG_ON(ret);
1356                 node = find_shared_node(&wc->shared, bytenr);
1357                 wc->nodes[level] = node;
1358                 wc->active_node = level;
1359                 return 0;
1360         }
1361
1362         if (wc->root_level == wc->active_node &&
1363             btrfs_root_refs(&root->root_item) == 0) {
1364                 if (--node->refs == 0) {
1365                         free_inode_recs_tree(&node->root_cache);
1366                         free_inode_recs_tree(&node->inode_cache);
1367                         remove_cache_extent(&wc->shared, &node->cache);
1368                         free(node);
1369                 }
1370                 return 1;
1371         }
1372
1373         dest = wc->nodes[wc->active_node];
1374         splice_shared_node(node, dest);
1375         if (node->refs == 0) {
1376                 remove_cache_extent(&wc->shared, &node->cache);
1377                 free(node);
1378         }
1379         return 1;
1380 }
1381
1382 static int leave_shared_node(struct btrfs_root *root,
1383                              struct walk_control *wc, int level)
1384 {
1385         struct shared_node *node;
1386         struct shared_node *dest;
1387         int i;
1388
1389         if (level == wc->root_level)
1390                 return 0;
1391
1392         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393                 if (wc->nodes[i])
1394                         break;
1395         }
1396         BUG_ON(i >= BTRFS_MAX_LEVEL);
1397
1398         node = wc->nodes[wc->active_node];
1399         wc->nodes[wc->active_node] = NULL;
1400         wc->active_node = i;
1401
1402         dest = wc->nodes[wc->active_node];
1403         if (wc->active_node < wc->root_level ||
1404             btrfs_root_refs(&root->root_item) > 0) {
1405                 BUG_ON(node->refs <= 1);
1406                 splice_shared_node(node, dest);
1407         } else {
1408                 BUG_ON(node->refs < 2);
1409                 node->refs--;
1410         }
1411         return 0;
1412 }
1413
1414 /*
1415  * Returns:
1416  * < 0 - on error
1417  * 1   - if the root with id child_root_id is a child of root parent_root_id
1418  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1419  *       has other root(s) as parent(s)
1420  * 2   - if the root child_root_id doesn't have any parent roots
1421  */
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423                          u64 child_root_id)
1424 {
1425         struct btrfs_path path;
1426         struct btrfs_key key;
1427         struct extent_buffer *leaf;
1428         int has_parent = 0;
1429         int ret;
1430
1431         btrfs_init_path(&path);
1432
1433         key.objectid = parent_root_id;
1434         key.type = BTRFS_ROOT_REF_KEY;
1435         key.offset = child_root_id;
1436         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437                                 0, 0);
1438         if (ret < 0)
1439                 return ret;
1440         btrfs_release_path(&path);
1441         if (!ret)
1442                 return 1;
1443
1444         key.objectid = child_root_id;
1445         key.type = BTRFS_ROOT_BACKREF_KEY;
1446         key.offset = 0;
1447         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448                                 0, 0);
1449         if (ret < 0)
1450                 goto out;
1451
1452         while (1) {
1453                 leaf = path.nodes[0];
1454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456                         if (ret)
1457                                 break;
1458                         leaf = path.nodes[0];
1459                 }
1460
1461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462                 if (key.objectid != child_root_id ||
1463                     key.type != BTRFS_ROOT_BACKREF_KEY)
1464                         break;
1465
1466                 has_parent = 1;
1467
1468                 if (key.offset == parent_root_id) {
1469                         btrfs_release_path(&path);
1470                         return 1;
1471                 }
1472
1473                 path.slots[0]++;
1474         }
1475 out:
1476         btrfs_release_path(&path);
1477         if (ret < 0)
1478                 return ret;
1479         return has_parent ? 0 : 2;
1480 }
1481
1482 static int process_dir_item(struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (cur + sizeof(*di) + name_len > total ||
1517                     name_len > BTRFS_NAME_LEN) {
1518                         error = REF_ERR_NAME_TOO_LONG;
1519
1520                         if (cur + sizeof(*di) > total)
1521                                 break;
1522                         len = min_t(u32, total - cur - sizeof(*di),
1523                                     BTRFS_NAME_LEN);
1524                 } else {
1525                         len = name_len;
1526                         error = 0;
1527                 }
1528
1529                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530
1531                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1532                         add_inode_backref(inode_cache, location.objectid,
1533                                           key->objectid, key->offset, namebuf,
1534                                           len, filetype, key->type, error);
1535                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1536                         add_inode_backref(root_cache, location.objectid,
1537                                           key->objectid, key->offset,
1538                                           namebuf, len, filetype,
1539                                           key->type, error);
1540                 } else {
1541                         fprintf(stderr, "invalid location in dir item %u\n",
1542                                 location.type);
1543                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1544                                           key->objectid, key->offset, namebuf,
1545                                           len, filetype, key->type, error);
1546                 }
1547
1548                 len = sizeof(*di) + name_len + data_len;
1549                 di = (struct btrfs_dir_item *)((char *)di + len);
1550                 cur += len;
1551         }
1552         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1553                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1554
1555         return 0;
1556 }
1557
1558 static int process_inode_ref(struct extent_buffer *eb,
1559                              int slot, struct btrfs_key *key,
1560                              struct shared_node *active_node)
1561 {
1562         u32 total;
1563         u32 cur = 0;
1564         u32 len;
1565         u32 name_len;
1566         u64 index;
1567         int error;
1568         struct cache_tree *inode_cache;
1569         struct btrfs_inode_ref *ref;
1570         char namebuf[BTRFS_NAME_LEN];
1571
1572         inode_cache = &active_node->inode_cache;
1573
1574         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1575         total = btrfs_item_size_nr(eb, slot);
1576         while (cur < total) {
1577                 name_len = btrfs_inode_ref_name_len(eb, ref);
1578                 index = btrfs_inode_ref_index(eb, ref);
1579
1580                 /* inode_ref + namelen should not cross item boundary */
1581                 if (cur + sizeof(*ref) + name_len > total ||
1582                     name_len > BTRFS_NAME_LEN) {
1583                         if (total < cur + sizeof(*ref))
1584                                 break;
1585
1586                         /* Still try to read out the remaining part */
1587                         len = min_t(u32, total - cur - sizeof(*ref),
1588                                     BTRFS_NAME_LEN);
1589                         error = REF_ERR_NAME_TOO_LONG;
1590                 } else {
1591                         len = name_len;
1592                         error = 0;
1593                 }
1594
1595                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1596                 add_inode_backref(inode_cache, key->objectid, key->offset,
1597                                   index, namebuf, len, 0, key->type, error);
1598
1599                 len = sizeof(*ref) + name_len;
1600                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1601                 cur += len;
1602         }
1603         return 0;
1604 }
1605
1606 static int process_inode_extref(struct extent_buffer *eb,
1607                                 int slot, struct btrfs_key *key,
1608                                 struct shared_node *active_node)
1609 {
1610         u32 total;
1611         u32 cur = 0;
1612         u32 len;
1613         u32 name_len;
1614         u64 index;
1615         u64 parent;
1616         int error;
1617         struct cache_tree *inode_cache;
1618         struct btrfs_inode_extref *extref;
1619         char namebuf[BTRFS_NAME_LEN];
1620
1621         inode_cache = &active_node->inode_cache;
1622
1623         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1624         total = btrfs_item_size_nr(eb, slot);
1625         while (cur < total) {
1626                 name_len = btrfs_inode_extref_name_len(eb, extref);
1627                 index = btrfs_inode_extref_index(eb, extref);
1628                 parent = btrfs_inode_extref_parent(eb, extref);
1629                 if (name_len <= BTRFS_NAME_LEN) {
1630                         len = name_len;
1631                         error = 0;
1632                 } else {
1633                         len = BTRFS_NAME_LEN;
1634                         error = REF_ERR_NAME_TOO_LONG;
1635                 }
1636                 read_extent_buffer(eb, namebuf,
1637                                    (unsigned long)(extref + 1), len);
1638                 add_inode_backref(inode_cache, key->objectid, parent,
1639                                   index, namebuf, len, 0, key->type, error);
1640
1641                 len = sizeof(*extref) + name_len;
1642                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1643                 cur += len;
1644         }
1645         return 0;
1646
1647 }
1648
1649 static int count_csum_range(struct btrfs_root *root, u64 start,
1650                             u64 len, u64 *found)
1651 {
1652         struct btrfs_key key;
1653         struct btrfs_path path;
1654         struct extent_buffer *leaf;
1655         int ret;
1656         size_t size;
1657         *found = 0;
1658         u64 csum_end;
1659         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1660
1661         btrfs_init_path(&path);
1662
1663         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1664         key.offset = start;
1665         key.type = BTRFS_EXTENT_CSUM_KEY;
1666
1667         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1668                                 &key, &path, 0, 0);
1669         if (ret < 0)
1670                 goto out;
1671         if (ret > 0 && path.slots[0] > 0) {
1672                 leaf = path.nodes[0];
1673                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1674                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1675                     key.type == BTRFS_EXTENT_CSUM_KEY)
1676                         path.slots[0]--;
1677         }
1678
1679         while (len > 0) {
1680                 leaf = path.nodes[0];
1681                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1682                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1683                         if (ret > 0)
1684                                 break;
1685                         else if (ret < 0)
1686                                 goto out;
1687                         leaf = path.nodes[0];
1688                 }
1689
1690                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1691                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1692                     key.type != BTRFS_EXTENT_CSUM_KEY)
1693                         break;
1694
1695                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1696                 if (key.offset >= start + len)
1697                         break;
1698
1699                 if (key.offset > start)
1700                         start = key.offset;
1701
1702                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1703                 csum_end = key.offset + (size / csum_size) *
1704                            root->fs_info->sectorsize;
1705                 if (csum_end > start) {
1706                         size = min(csum_end - start, len);
1707                         len -= size;
1708                         start += size;
1709                         *found += size;
1710                 }
1711
1712                 path.slots[0]++;
1713         }
1714 out:
1715         btrfs_release_path(&path);
1716         if (ret < 0)
1717                 return ret;
1718         return 0;
1719 }
1720
1721 static int process_file_extent(struct btrfs_root *root,
1722                                 struct extent_buffer *eb,
1723                                 int slot, struct btrfs_key *key,
1724                                 struct shared_node *active_node)
1725 {
1726         struct inode_record *rec;
1727         struct btrfs_file_extent_item *fi;
1728         u64 num_bytes = 0;
1729         u64 disk_bytenr = 0;
1730         u64 extent_offset = 0;
1731         u64 mask = root->fs_info->sectorsize - 1;
1732         int extent_type;
1733         int ret;
1734
1735         rec = active_node->current;
1736         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1737         rec->found_file_extent = 1;
1738
1739         if (rec->extent_start == (u64)-1) {
1740                 rec->extent_start = key->offset;
1741                 rec->extent_end = key->offset;
1742         }
1743
1744         if (rec->extent_end > key->offset)
1745                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1746         else if (rec->extent_end < key->offset) {
1747                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1748                                            key->offset - rec->extent_end);
1749                 if (ret < 0)
1750                         return ret;
1751         }
1752
1753         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1754         extent_type = btrfs_file_extent_type(eb, fi);
1755
1756         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1757                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1758                 if (num_bytes == 0)
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 rec->found_size += num_bytes;
1761                 num_bytes = (num_bytes + mask) & ~mask;
1762         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1763                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1764                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1765                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1766                 extent_offset = btrfs_file_extent_offset(eb, fi);
1767                 if (num_bytes == 0 || (num_bytes & mask))
1768                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769                 if (num_bytes + extent_offset >
1770                     btrfs_file_extent_ram_bytes(eb, fi))
1771                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1772                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1773                     (btrfs_file_extent_compression(eb, fi) ||
1774                      btrfs_file_extent_encryption(eb, fi) ||
1775                      btrfs_file_extent_other_encoding(eb, fi)))
1776                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777                 if (disk_bytenr > 0)
1778                         rec->found_size += num_bytes;
1779         } else {
1780                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1781         }
1782         rec->extent_end = key->offset + num_bytes;
1783
1784         /*
1785          * The data reloc tree will copy full extents into its inode and then
1786          * copy the corresponding csums.  Because the extent it copied could be
1787          * a preallocated extent that hasn't been written to yet there may be no
1788          * csums to copy, ergo we won't have csums for our file extent.  This is
1789          * ok so just don't bother checking csums if the inode belongs to the
1790          * data reloc tree.
1791          */
1792         if (disk_bytenr > 0 &&
1793             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1794                 u64 found;
1795                 if (btrfs_file_extent_compression(eb, fi))
1796                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1797                 else
1798                         disk_bytenr += extent_offset;
1799
1800                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1801                 if (ret < 0)
1802                         return ret;
1803                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1804                         if (found > 0)
1805                                 rec->found_csum_item = 1;
1806                         if (found < num_bytes)
1807                                 rec->some_csum_missing = 1;
1808                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1809                         if (found > 0)
1810                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1811                 }
1812         }
1813         return 0;
1814 }
1815
1816 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1817                             struct walk_control *wc)
1818 {
1819         struct btrfs_key key;
1820         u32 nritems;
1821         int i;
1822         int ret = 0;
1823         struct cache_tree *inode_cache;
1824         struct shared_node *active_node;
1825
1826         if (wc->root_level == wc->active_node &&
1827             btrfs_root_refs(&root->root_item) == 0)
1828                 return 0;
1829
1830         active_node = wc->nodes[wc->active_node];
1831         inode_cache = &active_node->inode_cache;
1832         nritems = btrfs_header_nritems(eb);
1833         for (i = 0; i < nritems; i++) {
1834                 btrfs_item_key_to_cpu(eb, &key, i);
1835
1836                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1837                         continue;
1838                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1839                         continue;
1840
1841                 if (active_node->current == NULL ||
1842                     active_node->current->ino < key.objectid) {
1843                         if (active_node->current) {
1844                                 active_node->current->checked = 1;
1845                                 maybe_free_inode_rec(inode_cache,
1846                                                      active_node->current);
1847                         }
1848                         active_node->current = get_inode_rec(inode_cache,
1849                                                              key.objectid, 1);
1850                         BUG_ON(IS_ERR(active_node->current));
1851                 }
1852                 switch (key.type) {
1853                 case BTRFS_DIR_ITEM_KEY:
1854                 case BTRFS_DIR_INDEX_KEY:
1855                         ret = process_dir_item(eb, i, &key, active_node);
1856                         break;
1857                 case BTRFS_INODE_REF_KEY:
1858                         ret = process_inode_ref(eb, i, &key, active_node);
1859                         break;
1860                 case BTRFS_INODE_EXTREF_KEY:
1861                         ret = process_inode_extref(eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_ITEM_KEY:
1864                         ret = process_inode_item(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_EXTENT_DATA_KEY:
1867                         ret = process_file_extent(root, eb, i, &key,
1868                                                   active_node);
1869                         break;
1870                 default:
1871                         break;
1872                 };
1873         }
1874         return ret;
1875 }
1876
1877 struct node_refs {
1878         u64 bytenr[BTRFS_MAX_LEVEL];
1879         u64 refs[BTRFS_MAX_LEVEL];
1880         int need_check[BTRFS_MAX_LEVEL];
1881 };
1882
1883 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1884                              struct node_refs *nrefs, u64 level);
1885 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1886                             unsigned int ext_ref);
1887
1888 /*
1889  * Returns >0  Found error, not fatal, should continue
1890  * Returns <0  Fatal error, must exit the whole check
1891  * Returns 0   No errors found
1892  */
1893 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1894                                struct node_refs *nrefs, int *level, int ext_ref)
1895 {
1896         struct extent_buffer *cur = path->nodes[0];
1897         struct btrfs_key key;
1898         u64 cur_bytenr;
1899         u32 nritems;
1900         u64 first_ino = 0;
1901         int root_level = btrfs_header_level(root->node);
1902         int i;
1903         int ret = 0; /* Final return value */
1904         int err = 0; /* Positive error bitmap */
1905
1906         cur_bytenr = cur->start;
1907
1908         /* skip to first inode item or the first inode number change */
1909         nritems = btrfs_header_nritems(cur);
1910         for (i = 0; i < nritems; i++) {
1911                 btrfs_item_key_to_cpu(cur, &key, i);
1912                 if (i == 0)
1913                         first_ino = key.objectid;
1914                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1915                     (first_ino && first_ino != key.objectid))
1916                         break;
1917         }
1918         if (i == nritems) {
1919                 path->slots[0] = nritems;
1920                 return 0;
1921         }
1922         path->slots[0] = i;
1923
1924 again:
1925         err |= check_inode_item(root, path, ext_ref);
1926
1927         if (err & LAST_ITEM)
1928                 goto out;
1929
1930         /* still have inode items in thie leaf */
1931         if (cur->start == cur_bytenr)
1932                 goto again;
1933
1934         /*
1935          * we have switched to another leaf, above nodes may
1936          * have changed, here walk down the path, if a node
1937          * or leaf is shared, check whether we can skip this
1938          * node or leaf.
1939          */
1940         for (i = root_level; i >= 0; i--) {
1941                 if (path->nodes[i]->start == nrefs->bytenr[i])
1942                         continue;
1943
1944                 ret = update_nodes_refs(root,
1945                                 path->nodes[i]->start,
1946                                 nrefs, i);
1947                 if (ret)
1948                         goto out;
1949
1950                 if (!nrefs->need_check[i]) {
1951                         *level += 1;
1952                         break;
1953                 }
1954         }
1955
1956         for (i = 0; i < *level; i++) {
1957                 free_extent_buffer(path->nodes[i]);
1958                 path->nodes[i] = NULL;
1959         }
1960 out:
1961         err &= ~LAST_ITEM;
1962         if (err && !ret)
1963                 ret = err;
1964         return ret;
1965 }
1966
1967 static void reada_walk_down(struct btrfs_root *root,
1968                             struct extent_buffer *node, int slot)
1969 {
1970         u64 bytenr;
1971         u64 ptr_gen;
1972         u32 nritems;
1973         u32 blocksize;
1974         int i;
1975         int level;
1976
1977         level = btrfs_header_level(node);
1978         if (level != 1)
1979                 return;
1980
1981         nritems = btrfs_header_nritems(node);
1982         blocksize = root->fs_info->nodesize;
1983         for (i = slot; i < nritems; i++) {
1984                 bytenr = btrfs_node_blockptr(node, i);
1985                 ptr_gen = btrfs_node_ptr_generation(node, i);
1986                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1987         }
1988 }
1989
1990 /*
1991  * Check the child node/leaf by the following condition:
1992  * 1. the first item key of the node/leaf should be the same with the one
1993  *    in parent.
1994  * 2. block in parent node should match the child node/leaf.
1995  * 3. generation of parent node and child's header should be consistent.
1996  *
1997  * Or the child node/leaf pointed by the key in parent is not valid.
1998  *
1999  * We hope to check leaf owner too, but since subvol may share leaves,
2000  * which makes leaf owner check not so strong, key check should be
2001  * sufficient enough for that case.
2002  */
2003 static int check_child_node(struct extent_buffer *parent, int slot,
2004                             struct extent_buffer *child)
2005 {
2006         struct btrfs_key parent_key;
2007         struct btrfs_key child_key;
2008         int ret = 0;
2009
2010         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2011         if (btrfs_header_level(child) == 0)
2012                 btrfs_item_key_to_cpu(child, &child_key, 0);
2013         else
2014                 btrfs_node_key_to_cpu(child, &child_key, 0);
2015
2016         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2017                 ret = -EINVAL;
2018                 fprintf(stderr,
2019                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2020                         parent_key.objectid, parent_key.type, parent_key.offset,
2021                         child_key.objectid, child_key.type, child_key.offset);
2022         }
2023         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2024                 ret = -EINVAL;
2025                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2026                         btrfs_node_blockptr(parent, slot),
2027                         btrfs_header_bytenr(child));
2028         }
2029         if (btrfs_node_ptr_generation(parent, slot) !=
2030             btrfs_header_generation(child)) {
2031                 ret = -EINVAL;
2032                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2033                         btrfs_header_generation(child),
2034                         btrfs_node_ptr_generation(parent, slot));
2035         }
2036         return ret;
2037 }
2038
2039 /*
2040  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2041  * in every fs or file tree check. Here we find its all root ids, and only check
2042  * it in the fs or file tree which has the smallest root id.
2043  */
2044 static int need_check(struct btrfs_root *root, struct ulist *roots)
2045 {
2046         struct rb_node *node;
2047         struct ulist_node *u;
2048
2049         if (roots->nnodes == 1)
2050                 return 1;
2051
2052         node = rb_first(&roots->root);
2053         u = rb_entry(node, struct ulist_node, rb_node);
2054         /*
2055          * current root id is not smallest, we skip it and let it be checked
2056          * in the fs or file tree who hash the smallest root id.
2057          */
2058         if (root->objectid != u->val)
2059                 return 0;
2060
2061         return 1;
2062 }
2063
2064 /*
2065  * for a tree node or leaf, we record its reference count, so later if we still
2066  * process this node or leaf, don't need to compute its reference count again.
2067  */
2068 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2069                              struct node_refs *nrefs, u64 level)
2070 {
2071         int check, ret;
2072         u64 refs;
2073         struct ulist *roots;
2074
2075         if (nrefs->bytenr[level] != bytenr) {
2076                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2077                                        level, 1, &refs, NULL);
2078                 if (ret < 0)
2079                         return ret;
2080
2081                 nrefs->bytenr[level] = bytenr;
2082                 nrefs->refs[level] = refs;
2083                 if (refs > 1) {
2084                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2085                                                    0, &roots);
2086                         if (ret)
2087                                 return -EIO;
2088
2089                         check = need_check(root, roots);
2090                         ulist_free(roots);
2091                         nrefs->need_check[level] = check;
2092                 } else {
2093                         nrefs->need_check[level] = 1;
2094                 }
2095         }
2096
2097         return 0;
2098 }
2099
2100 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2101                           struct walk_control *wc, int *level,
2102                           struct node_refs *nrefs)
2103 {
2104         enum btrfs_tree_block_status status;
2105         u64 bytenr;
2106         u64 ptr_gen;
2107         struct extent_buffer *next;
2108         struct extent_buffer *cur;
2109         u32 blocksize;
2110         int ret, err = 0;
2111         u64 refs;
2112
2113         WARN_ON(*level < 0);
2114         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2115
2116         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2117                 refs = nrefs->refs[*level];
2118                 ret = 0;
2119         } else {
2120                 ret = btrfs_lookup_extent_info(NULL, root,
2121                                        path->nodes[*level]->start,
2122                                        *level, 1, &refs, NULL);
2123                 if (ret < 0) {
2124                         err = ret;
2125                         goto out;
2126                 }
2127                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2128                 nrefs->refs[*level] = refs;
2129         }
2130
2131         if (refs > 1) {
2132                 ret = enter_shared_node(root, path->nodes[*level]->start,
2133                                         refs, wc, *level);
2134                 if (ret > 0) {
2135                         err = ret;
2136                         goto out;
2137                 }
2138         }
2139
2140         while (*level >= 0) {
2141                 WARN_ON(*level < 0);
2142                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2143                 cur = path->nodes[*level];
2144
2145                 if (btrfs_header_level(cur) != *level)
2146                         WARN_ON(1);
2147
2148                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2149                         break;
2150                 if (*level == 0) {
2151                         ret = process_one_leaf(root, cur, wc);
2152                         if (ret < 0)
2153                                 err = ret;
2154                         break;
2155                 }
2156                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2157                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2158                 blocksize = root->fs_info->nodesize;
2159
2160                 if (bytenr == nrefs->bytenr[*level - 1]) {
2161                         refs = nrefs->refs[*level - 1];
2162                 } else {
2163                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2164                                         *level - 1, 1, &refs, NULL);
2165                         if (ret < 0) {
2166                                 refs = 0;
2167                         } else {
2168                                 nrefs->bytenr[*level - 1] = bytenr;
2169                                 nrefs->refs[*level - 1] = refs;
2170                         }
2171                 }
2172
2173                 if (refs > 1) {
2174                         ret = enter_shared_node(root, bytenr, refs,
2175                                                 wc, *level - 1);
2176                         if (ret > 0) {
2177                                 path->slots[*level]++;
2178                                 continue;
2179                         }
2180                 }
2181
2182                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2183                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2184                         free_extent_buffer(next);
2185                         reada_walk_down(root, cur, path->slots[*level]);
2186                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2187                                                ptr_gen);
2188                         if (!extent_buffer_uptodate(next)) {
2189                                 struct btrfs_key node_key;
2190
2191                                 btrfs_node_key_to_cpu(path->nodes[*level],
2192                                                       &node_key,
2193                                                       path->slots[*level]);
2194                                 btrfs_add_corrupt_extent_record(root->fs_info,
2195                                                 &node_key,
2196                                                 path->nodes[*level]->start,
2197                                                 root->fs_info->nodesize,
2198                                                 *level);
2199                                 err = -EIO;
2200                                 goto out;
2201                         }
2202                 }
2203
2204                 ret = check_child_node(cur, path->slots[*level], next);
2205                 if (ret) {
2206                         free_extent_buffer(next);
2207                         err = ret;
2208                         goto out;
2209                 }
2210
2211                 if (btrfs_is_leaf(next))
2212                         status = btrfs_check_leaf(root, NULL, next);
2213                 else
2214                         status = btrfs_check_node(root, NULL, next);
2215                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2216                         free_extent_buffer(next);
2217                         err = -EIO;
2218                         goto out;
2219                 }
2220
2221                 *level = *level - 1;
2222                 free_extent_buffer(path->nodes[*level]);
2223                 path->nodes[*level] = next;
2224                 path->slots[*level] = 0;
2225         }
2226 out:
2227         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2228         return err;
2229 }
2230
2231 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2232                             unsigned int ext_ref);
2233
2234 /*
2235  * Returns >0  Found error, should continue
2236  * Returns <0  Fatal error, must exit the whole check
2237  * Returns 0   No errors found
2238  */
2239 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2240                              int *level, struct node_refs *nrefs, int ext_ref)
2241 {
2242         enum btrfs_tree_block_status status;
2243         u64 bytenr;
2244         u64 ptr_gen;
2245         struct extent_buffer *next;
2246         struct extent_buffer *cur;
2247         u32 blocksize;
2248         int ret;
2249
2250         WARN_ON(*level < 0);
2251         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2252
2253         ret = update_nodes_refs(root, path->nodes[*level]->start,
2254                                 nrefs, *level);
2255         if (ret < 0)
2256                 return ret;
2257
2258         while (*level >= 0) {
2259                 WARN_ON(*level < 0);
2260                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2261                 cur = path->nodes[*level];
2262
2263                 if (btrfs_header_level(cur) != *level)
2264                         WARN_ON(1);
2265
2266                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2267                         break;
2268                 /* Don't forgot to check leaf/node validation */
2269                 if (*level == 0) {
2270                         ret = btrfs_check_leaf(root, NULL, cur);
2271                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2272                                 ret = -EIO;
2273                                 break;
2274                         }
2275                         ret = process_one_leaf_v2(root, path, nrefs,
2276                                                   level, ext_ref);
2277                         break;
2278                 } else {
2279                         ret = btrfs_check_node(root, NULL, cur);
2280                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2281                                 ret = -EIO;
2282                                 break;
2283                         }
2284                 }
2285                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2286                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2287                 blocksize = root->fs_info->nodesize;
2288
2289                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2290                 if (ret)
2291                         break;
2292                 if (!nrefs->need_check[*level - 1]) {
2293                         path->slots[*level]++;
2294                         continue;
2295                 }
2296
2297                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2298                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2299                         free_extent_buffer(next);
2300                         reada_walk_down(root, cur, path->slots[*level]);
2301                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2302                                                ptr_gen);
2303                         if (!extent_buffer_uptodate(next)) {
2304                                 struct btrfs_key node_key;
2305
2306                                 btrfs_node_key_to_cpu(path->nodes[*level],
2307                                                       &node_key,
2308                                                       path->slots[*level]);
2309                                 btrfs_add_corrupt_extent_record(root->fs_info,
2310                                                 &node_key,
2311                                                 path->nodes[*level]->start,
2312                                                 root->fs_info->nodesize,
2313                                                 *level);
2314                                 ret = -EIO;
2315                                 break;
2316                         }
2317                 }
2318
2319                 ret = check_child_node(cur, path->slots[*level], next);
2320                 if (ret < 0) 
2321                         break;
2322
2323                 if (btrfs_is_leaf(next))
2324                         status = btrfs_check_leaf(root, NULL, next);
2325                 else
2326                         status = btrfs_check_node(root, NULL, next);
2327                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2328                         free_extent_buffer(next);
2329                         ret = -EIO;
2330                         break;
2331                 }
2332
2333                 *level = *level - 1;
2334                 free_extent_buffer(path->nodes[*level]);
2335                 path->nodes[*level] = next;
2336                 path->slots[*level] = 0;
2337         }
2338         return ret;
2339 }
2340
2341 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2342                         struct walk_control *wc, int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         BUG_ON(*level > wc->active_node);
2357                         if (*level == wc->active_node)
2358                                 leave_shared_node(root, wc, *level);
2359                         *level = i + 1;
2360                 }
2361         }
2362         return 1;
2363 }
2364
2365 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2366                            int *level)
2367 {
2368         int i;
2369         struct extent_buffer *leaf;
2370
2371         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2372                 leaf = path->nodes[i];
2373                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2374                         path->slots[i]++;
2375                         *level = i;
2376                         return 0;
2377                 } else {
2378                         free_extent_buffer(path->nodes[*level]);
2379                         path->nodes[*level] = NULL;
2380                         *level = i + 1;
2381                 }
2382         }
2383         return 1;
2384 }
2385
2386 static int check_root_dir(struct inode_record *rec)
2387 {
2388         struct inode_backref *backref;
2389         int ret = -1;
2390
2391         if (!rec->found_inode_item || rec->errors)
2392                 goto out;
2393         if (rec->nlink != 1 || rec->found_link != 0)
2394                 goto out;
2395         if (list_empty(&rec->backrefs))
2396                 goto out;
2397         backref = to_inode_backref(rec->backrefs.next);
2398         if (!backref->found_inode_ref)
2399                 goto out;
2400         if (backref->index != 0 || backref->namelen != 2 ||
2401             memcmp(backref->name, "..", 2))
2402                 goto out;
2403         if (backref->found_dir_index || backref->found_dir_item)
2404                 goto out;
2405         ret = 0;
2406 out:
2407         return ret;
2408 }
2409
2410 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2411                               struct btrfs_root *root, struct btrfs_path *path,
2412                               struct inode_record *rec)
2413 {
2414         struct btrfs_inode_item *ei;
2415         struct btrfs_key key;
2416         int ret;
2417
2418         key.objectid = rec->ino;
2419         key.type = BTRFS_INODE_ITEM_KEY;
2420         key.offset = (u64)-1;
2421
2422         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2423         if (ret < 0)
2424                 goto out;
2425         if (ret) {
2426                 if (!path->slots[0]) {
2427                         ret = -ENOENT;
2428                         goto out;
2429                 }
2430                 path->slots[0]--;
2431                 ret = 0;
2432         }
2433         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2434         if (key.objectid != rec->ino) {
2435                 ret = -ENOENT;
2436                 goto out;
2437         }
2438
2439         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2440                             struct btrfs_inode_item);
2441         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2442         btrfs_mark_buffer_dirty(path->nodes[0]);
2443         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2444         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2445                root->root_key.objectid);
2446 out:
2447         btrfs_release_path(path);
2448         return ret;
2449 }
2450
2451 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2452                                     struct btrfs_root *root,
2453                                     struct btrfs_path *path,
2454                                     struct inode_record *rec)
2455 {
2456         int ret;
2457
2458         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2459         btrfs_release_path(path);
2460         if (!ret)
2461                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2462         return ret;
2463 }
2464
2465 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2466                                struct btrfs_root *root,
2467                                struct btrfs_path *path,
2468                                struct inode_record *rec)
2469 {
2470         struct btrfs_inode_item *ei;
2471         struct btrfs_key key;
2472         int ret = 0;
2473
2474         key.objectid = rec->ino;
2475         key.type = BTRFS_INODE_ITEM_KEY;
2476         key.offset = 0;
2477
2478         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2479         if (ret) {
2480                 if (ret > 0)
2481                         ret = -ENOENT;
2482                 goto out;
2483         }
2484
2485         /* Since ret == 0, no need to check anything */
2486         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2487                             struct btrfs_inode_item);
2488         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2489         btrfs_mark_buffer_dirty(path->nodes[0]);
2490         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2491         printf("reset nbytes for ino %llu root %llu\n",
2492                rec->ino, root->root_key.objectid);
2493 out:
2494         btrfs_release_path(path);
2495         return ret;
2496 }
2497
2498 static int add_missing_dir_index(struct btrfs_root *root,
2499                                  struct cache_tree *inode_cache,
2500                                  struct inode_record *rec,
2501                                  struct inode_backref *backref)
2502 {
2503         struct btrfs_path path;
2504         struct btrfs_trans_handle *trans;
2505         struct btrfs_dir_item *dir_item;
2506         struct extent_buffer *leaf;
2507         struct btrfs_key key;
2508         struct btrfs_disk_key disk_key;
2509         struct inode_record *dir_rec;
2510         unsigned long name_ptr;
2511         u32 data_size = sizeof(*dir_item) + backref->namelen;
2512         int ret;
2513
2514         trans = btrfs_start_transaction(root, 1);
2515         if (IS_ERR(trans))
2516                 return PTR_ERR(trans);
2517
2518         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2519                 (unsigned long long)rec->ino);
2520
2521         btrfs_init_path(&path);
2522         key.objectid = backref->dir;
2523         key.type = BTRFS_DIR_INDEX_KEY;
2524         key.offset = backref->index;
2525         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2526         BUG_ON(ret);
2527
2528         leaf = path.nodes[0];
2529         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2530
2531         disk_key.objectid = cpu_to_le64(rec->ino);
2532         disk_key.type = BTRFS_INODE_ITEM_KEY;
2533         disk_key.offset = 0;
2534
2535         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2536         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2537         btrfs_set_dir_data_len(leaf, dir_item, 0);
2538         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2539         name_ptr = (unsigned long)(dir_item + 1);
2540         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2541         btrfs_mark_buffer_dirty(leaf);
2542         btrfs_release_path(&path);
2543         btrfs_commit_transaction(trans, root);
2544
2545         backref->found_dir_index = 1;
2546         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2547         BUG_ON(IS_ERR(dir_rec));
2548         if (!dir_rec)
2549                 return 0;
2550         dir_rec->found_size += backref->namelen;
2551         if (dir_rec->found_size == dir_rec->isize &&
2552             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2553                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2554         if (dir_rec->found_size != dir_rec->isize)
2555                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2556
2557         return 0;
2558 }
2559
2560 static int delete_dir_index(struct btrfs_root *root,
2561                             struct inode_backref *backref)
2562 {
2563         struct btrfs_trans_handle *trans;
2564         struct btrfs_dir_item *di;
2565         struct btrfs_path path;
2566         int ret = 0;
2567
2568         trans = btrfs_start_transaction(root, 1);
2569         if (IS_ERR(trans))
2570                 return PTR_ERR(trans);
2571
2572         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2573                 (unsigned long long)backref->dir,
2574                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2575                 (unsigned long long)root->objectid);
2576
2577         btrfs_init_path(&path);
2578         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2579                                     backref->name, backref->namelen,
2580                                     backref->index, -1);
2581         if (IS_ERR(di)) {
2582                 ret = PTR_ERR(di);
2583                 btrfs_release_path(&path);
2584                 btrfs_commit_transaction(trans, root);
2585                 if (ret == -ENOENT)
2586                         return 0;
2587                 return ret;
2588         }
2589
2590         if (!di)
2591                 ret = btrfs_del_item(trans, root, &path);
2592         else
2593                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2594         BUG_ON(ret);
2595         btrfs_release_path(&path);
2596         btrfs_commit_transaction(trans, root);
2597         return ret;
2598 }
2599
2600 static int create_inode_item(struct btrfs_root *root,
2601                              struct inode_record *rec,
2602                              int root_dir)
2603 {
2604         struct btrfs_trans_handle *trans;
2605         struct btrfs_inode_item inode_item;
2606         time_t now = time(NULL);
2607         int ret;
2608
2609         trans = btrfs_start_transaction(root, 1);
2610         if (IS_ERR(trans)) {
2611                 ret = PTR_ERR(trans);
2612                 return ret;
2613         }
2614
2615         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2616                 "be incomplete, please check permissions and content after "
2617                 "the fsck completes.\n", (unsigned long long)root->objectid,
2618                 (unsigned long long)rec->ino);
2619
2620         memset(&inode_item, 0, sizeof(inode_item));
2621         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2622         if (root_dir)
2623                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2624         else
2625                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2626         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2627         if (rec->found_dir_item) {
2628                 if (rec->found_file_extent)
2629                         fprintf(stderr, "root %llu inode %llu has both a dir "
2630                                 "item and extents, unsure if it is a dir or a "
2631                                 "regular file so setting it as a directory\n",
2632                                 (unsigned long long)root->objectid,
2633                                 (unsigned long long)rec->ino);
2634                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2635                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2636         } else if (!rec->found_dir_item) {
2637                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2638                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2639         }
2640         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2641         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2642         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2643         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2644         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2645         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2646         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2647         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2648
2649         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2650         BUG_ON(ret);
2651         btrfs_commit_transaction(trans, root);
2652         return 0;
2653 }
2654
2655 static int repair_inode_backrefs(struct btrfs_root *root,
2656                                  struct inode_record *rec,
2657                                  struct cache_tree *inode_cache,
2658                                  int delete)
2659 {
2660         struct inode_backref *tmp, *backref;
2661         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2662         int ret = 0;
2663         int repaired = 0;
2664
2665         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2666                 if (!delete && rec->ino == root_dirid) {
2667                         if (!rec->found_inode_item) {
2668                                 ret = create_inode_item(root, rec, 1);
2669                                 if (ret)
2670                                         break;
2671                                 repaired++;
2672                         }
2673                 }
2674
2675                 /* Index 0 for root dir's are special, don't mess with it */
2676                 if (rec->ino == root_dirid && backref->index == 0)
2677                         continue;
2678
2679                 if (delete &&
2680                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2681                      (backref->found_dir_index && backref->found_inode_ref &&
2682                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2683                         ret = delete_dir_index(root, backref);
2684                         if (ret)
2685                                 break;
2686                         repaired++;
2687                         list_del(&backref->list);
2688                         free(backref);
2689                         continue;
2690                 }
2691
2692                 if (!delete && !backref->found_dir_index &&
2693                     backref->found_dir_item && backref->found_inode_ref) {
2694                         ret = add_missing_dir_index(root, inode_cache, rec,
2695                                                     backref);
2696                         if (ret)
2697                                 break;
2698                         repaired++;
2699                         if (backref->found_dir_item &&
2700                             backref->found_dir_index) {
2701                                 if (!backref->errors &&
2702                                     backref->found_inode_ref) {
2703                                         list_del(&backref->list);
2704                                         free(backref);
2705                                         continue;
2706                                 }
2707                         }
2708                 }
2709
2710                 if (!delete && (!backref->found_dir_index &&
2711                                 !backref->found_dir_item &&
2712                                 backref->found_inode_ref)) {
2713                         struct btrfs_trans_handle *trans;
2714                         struct btrfs_key location;
2715
2716                         ret = check_dir_conflict(root, backref->name,
2717                                                  backref->namelen,
2718                                                  backref->dir,
2719                                                  backref->index);
2720                         if (ret) {
2721                                 /*
2722                                  * let nlink fixing routine to handle it,
2723                                  * which can do it better.
2724                                  */
2725                                 ret = 0;
2726                                 break;
2727                         }
2728                         location.objectid = rec->ino;
2729                         location.type = BTRFS_INODE_ITEM_KEY;
2730                         location.offset = 0;
2731
2732                         trans = btrfs_start_transaction(root, 1);
2733                         if (IS_ERR(trans)) {
2734                                 ret = PTR_ERR(trans);
2735                                 break;
2736                         }
2737                         fprintf(stderr, "adding missing dir index/item pair "
2738                                 "for inode %llu\n",
2739                                 (unsigned long long)rec->ino);
2740                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2741                                                     backref->namelen,
2742                                                     backref->dir, &location,
2743                                                     imode_to_type(rec->imode),
2744                                                     backref->index);
2745                         BUG_ON(ret);
2746                         btrfs_commit_transaction(trans, root);
2747                         repaired++;
2748                 }
2749
2750                 if (!delete && (backref->found_inode_ref &&
2751                                 backref->found_dir_index &&
2752                                 backref->found_dir_item &&
2753                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2754                                 !rec->found_inode_item)) {
2755                         ret = create_inode_item(root, rec, 0);
2756                         if (ret)
2757                                 break;
2758                         repaired++;
2759                 }
2760
2761         }
2762         return ret ? ret : repaired;
2763 }
2764
2765 /*
2766  * To determine the file type for nlink/inode_item repair
2767  *
2768  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2769  * Return -ENOENT if file type is not found.
2770  */
2771 static int find_file_type(struct inode_record *rec, u8 *type)
2772 {
2773         struct inode_backref *backref;
2774
2775         /* For inode item recovered case */
2776         if (rec->found_inode_item) {
2777                 *type = imode_to_type(rec->imode);
2778                 return 0;
2779         }
2780
2781         list_for_each_entry(backref, &rec->backrefs, list) {
2782                 if (backref->found_dir_index || backref->found_dir_item) {
2783                         *type = backref->filetype;
2784                         return 0;
2785                 }
2786         }
2787         return -ENOENT;
2788 }
2789
2790 /*
2791  * To determine the file name for nlink repair
2792  *
2793  * Return 0 if file name is found, set name and namelen.
2794  * Return -ENOENT if file name is not found.
2795  */
2796 static int find_file_name(struct inode_record *rec,
2797                           char *name, int *namelen)
2798 {
2799         struct inode_backref *backref;
2800
2801         list_for_each_entry(backref, &rec->backrefs, list) {
2802                 if (backref->found_dir_index || backref->found_dir_item ||
2803                     backref->found_inode_ref) {
2804                         memcpy(name, backref->name, backref->namelen);
2805                         *namelen = backref->namelen;
2806                         return 0;
2807                 }
2808         }
2809         return -ENOENT;
2810 }
2811
2812 /* Reset the nlink of the inode to the correct one */
2813 static int reset_nlink(struct btrfs_trans_handle *trans,
2814                        struct btrfs_root *root,
2815                        struct btrfs_path *path,
2816                        struct inode_record *rec)
2817 {
2818         struct inode_backref *backref;
2819         struct inode_backref *tmp;
2820         struct btrfs_key key;
2821         struct btrfs_inode_item *inode_item;
2822         int ret = 0;
2823
2824         /* We don't believe this either, reset it and iterate backref */
2825         rec->found_link = 0;
2826
2827         /* Remove all backref including the valid ones */
2828         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2829                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2830                                    backref->index, backref->name,
2831                                    backref->namelen, 0);
2832                 if (ret < 0)
2833                         goto out;
2834
2835                 /* remove invalid backref, so it won't be added back */
2836                 if (!(backref->found_dir_index &&
2837                       backref->found_dir_item &&
2838                       backref->found_inode_ref)) {
2839                         list_del(&backref->list);
2840                         free(backref);
2841                 } else {
2842                         rec->found_link++;
2843                 }
2844         }
2845
2846         /* Set nlink to 0 */
2847         key.objectid = rec->ino;
2848         key.type = BTRFS_INODE_ITEM_KEY;
2849         key.offset = 0;
2850         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2851         if (ret < 0)
2852                 goto out;
2853         if (ret > 0) {
2854                 ret = -ENOENT;
2855                 goto out;
2856         }
2857         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2858                                     struct btrfs_inode_item);
2859         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2860         btrfs_mark_buffer_dirty(path->nodes[0]);
2861         btrfs_release_path(path);
2862
2863         /*
2864          * Add back valid inode_ref/dir_item/dir_index,
2865          * add_link() will handle the nlink inc, so new nlink must be correct
2866          */
2867         list_for_each_entry(backref, &rec->backrefs, list) {
2868                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2869                                      backref->name, backref->namelen,
2870                                      backref->filetype, &backref->index, 1);
2871                 if (ret < 0)
2872                         goto out;
2873         }
2874 out:
2875         btrfs_release_path(path);
2876         return ret;
2877 }
2878
2879 static int get_highest_inode(struct btrfs_trans_handle *trans,
2880                                 struct btrfs_root *root,
2881                                 struct btrfs_path *path,
2882                                 u64 *highest_ino)
2883 {
2884         struct btrfs_key key, found_key;
2885         int ret;
2886
2887         btrfs_init_path(path);
2888         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2889         key.offset = -1;
2890         key.type = BTRFS_INODE_ITEM_KEY;
2891         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2892         if (ret == 1) {
2893                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2894                                 path->slots[0] - 1);
2895                 *highest_ino = found_key.objectid;
2896                 ret = 0;
2897         }
2898         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2899                 ret = -EOVERFLOW;
2900         btrfs_release_path(path);
2901         return ret;
2902 }
2903
2904 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2905                                struct btrfs_root *root,
2906                                struct btrfs_path *path,
2907                                struct inode_record *rec)
2908 {
2909         char *dir_name = "lost+found";
2910         char namebuf[BTRFS_NAME_LEN] = {0};
2911         u64 lost_found_ino;
2912         u32 mode = 0700;
2913         u8 type = 0;
2914         int namelen = 0;
2915         int name_recovered = 0;
2916         int type_recovered = 0;
2917         int ret = 0;
2918
2919         /*
2920          * Get file name and type first before these invalid inode ref
2921          * are deleted by remove_all_invalid_backref()
2922          */
2923         name_recovered = !find_file_name(rec, namebuf, &namelen);
2924         type_recovered = !find_file_type(rec, &type);
2925
2926         if (!name_recovered) {
2927                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2928                        rec->ino, rec->ino);
2929                 namelen = count_digits(rec->ino);
2930                 sprintf(namebuf, "%llu", rec->ino);
2931                 name_recovered = 1;
2932         }
2933         if (!type_recovered) {
2934                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2935                        rec->ino);
2936                 type = BTRFS_FT_REG_FILE;
2937                 type_recovered = 1;
2938         }
2939
2940         ret = reset_nlink(trans, root, path, rec);
2941         if (ret < 0) {
2942                 fprintf(stderr,
2943                         "Failed to reset nlink for inode %llu: %s\n",
2944                         rec->ino, strerror(-ret));
2945                 goto out;
2946         }
2947
2948         if (rec->found_link == 0) {
2949                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2950                 if (ret < 0)
2951                         goto out;
2952                 lost_found_ino++;
2953                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2954                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2955                                   mode);
2956                 if (ret < 0) {
2957                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2958                                 dir_name, strerror(-ret));
2959                         goto out;
2960                 }
2961                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2962                                      namebuf, namelen, type, NULL, 1);
2963                 /*
2964                  * Add ".INO" suffix several times to handle case where
2965                  * "FILENAME.INO" is already taken by another file.
2966                  */
2967                 while (ret == -EEXIST) {
2968                         /*
2969                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2970                          */
2971                         if (namelen + count_digits(rec->ino) + 1 >
2972                             BTRFS_NAME_LEN) {
2973                                 ret = -EFBIG;
2974                                 goto out;
2975                         }
2976                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2977                                  ".%llu", rec->ino);
2978                         namelen += count_digits(rec->ino) + 1;
2979                         ret = btrfs_add_link(trans, root, rec->ino,
2980                                              lost_found_ino, namebuf,
2981                                              namelen, type, NULL, 1);
2982                 }
2983                 if (ret < 0) {
2984                         fprintf(stderr,
2985                                 "Failed to link the inode %llu to %s dir: %s\n",
2986                                 rec->ino, dir_name, strerror(-ret));
2987                         goto out;
2988                 }
2989                 /*
2990                  * Just increase the found_link, don't actually add the
2991                  * backref. This will make things easier and this inode
2992                  * record will be freed after the repair is done.
2993                  * So fsck will not report problem about this inode.
2994                  */
2995                 rec->found_link++;
2996                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2997                        namelen, namebuf, dir_name);
2998         }
2999         printf("Fixed the nlink of inode %llu\n", rec->ino);
3000 out:
3001         /*
3002          * Clear the flag anyway, or we will loop forever for the same inode
3003          * as it will not be removed from the bad inode list and the dead loop
3004          * happens.
3005          */
3006         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3007         btrfs_release_path(path);
3008         return ret;
3009 }
3010
3011 /*
3012  * Check if there is any normal(reg or prealloc) file extent for given
3013  * ino.
3014  * This is used to determine the file type when neither its dir_index/item or
3015  * inode_item exists.
3016  *
3017  * This will *NOT* report error, if any error happens, just consider it does
3018  * not have any normal file extent.
3019  */
3020 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3021 {
3022         struct btrfs_path path;
3023         struct btrfs_key key;
3024         struct btrfs_key found_key;
3025         struct btrfs_file_extent_item *fi;
3026         u8 type;
3027         int ret = 0;
3028
3029         btrfs_init_path(&path);
3030         key.objectid = ino;
3031         key.type = BTRFS_EXTENT_DATA_KEY;
3032         key.offset = 0;
3033
3034         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3035         if (ret < 0) {
3036                 ret = 0;
3037                 goto out;
3038         }
3039         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3040                 ret = btrfs_next_leaf(root, &path);
3041                 if (ret) {
3042                         ret = 0;
3043                         goto out;
3044                 }
3045         }
3046         while (1) {
3047                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3048                                       path.slots[0]);
3049                 if (found_key.objectid != ino ||
3050                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3051                         break;
3052                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3053                                     struct btrfs_file_extent_item);
3054                 type = btrfs_file_extent_type(path.nodes[0], fi);
3055                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3056                         ret = 1;
3057                         goto out;
3058                 }
3059         }
3060 out:
3061         btrfs_release_path(&path);
3062         return ret;
3063 }
3064
3065 static u32 btrfs_type_to_imode(u8 type)
3066 {
3067         static u32 imode_by_btrfs_type[] = {
3068                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3069                 [BTRFS_FT_DIR]          = S_IFDIR,
3070                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3071                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3072                 [BTRFS_FT_FIFO]         = S_IFIFO,
3073                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3074                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3075         };
3076
3077         return imode_by_btrfs_type[(type)];
3078 }
3079
3080 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3081                                 struct btrfs_root *root,
3082                                 struct btrfs_path *path,
3083                                 struct inode_record *rec)
3084 {
3085         u8 filetype;
3086         u32 mode = 0700;
3087         int type_recovered = 0;
3088         int ret = 0;
3089
3090         printf("Trying to rebuild inode:%llu\n", rec->ino);
3091
3092         type_recovered = !find_file_type(rec, &filetype);
3093
3094         /*
3095          * Try to determine inode type if type not found.
3096          *
3097          * For found regular file extent, it must be FILE.
3098          * For found dir_item/index, it must be DIR.
3099          *
3100          * For undetermined one, use FILE as fallback.
3101          *
3102          * TODO:
3103          * 1. If found backref(inode_index/item is already handled) to it,
3104          *    it must be DIR.
3105          *    Need new inode-inode ref structure to allow search for that.
3106          */
3107         if (!type_recovered) {
3108                 if (rec->found_file_extent &&
3109                     find_normal_file_extent(root, rec->ino)) {
3110                         type_recovered = 1;
3111                         filetype = BTRFS_FT_REG_FILE;
3112                 } else if (rec->found_dir_item) {
3113                         type_recovered = 1;
3114                         filetype = BTRFS_FT_DIR;
3115                 } else if (!list_empty(&rec->orphan_extents)) {
3116                         type_recovered = 1;
3117                         filetype = BTRFS_FT_REG_FILE;
3118                 } else{
3119                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3120                                rec->ino);
3121                         type_recovered = 1;
3122                         filetype = BTRFS_FT_REG_FILE;
3123                 }
3124         }
3125
3126         ret = btrfs_new_inode(trans, root, rec->ino,
3127                               mode | btrfs_type_to_imode(filetype));
3128         if (ret < 0)
3129                 goto out;
3130
3131         /*
3132          * Here inode rebuild is done, we only rebuild the inode item,
3133          * don't repair the nlink(like move to lost+found).
3134          * That is the job of nlink repair.
3135          *
3136          * We just fill the record and return
3137          */
3138         rec->found_dir_item = 1;
3139         rec->imode = mode | btrfs_type_to_imode(filetype);
3140         rec->nlink = 0;
3141         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3142         /* Ensure the inode_nlinks repair function will be called */
3143         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3144 out:
3145         return ret;
3146 }
3147
3148 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3149                                       struct btrfs_root *root,
3150                                       struct btrfs_path *path,
3151                                       struct inode_record *rec)
3152 {
3153         struct orphan_data_extent *orphan;
3154         struct orphan_data_extent *tmp;
3155         int ret = 0;
3156
3157         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3158                 /*
3159                  * Check for conflicting file extents
3160                  *
3161                  * Here we don't know whether the extents is compressed or not,
3162                  * so we can only assume it not compressed nor data offset,
3163                  * and use its disk_len as extent length.
3164                  */
3165                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3166                                        orphan->offset, orphan->disk_len, 0);
3167                 btrfs_release_path(path);
3168                 if (ret < 0)
3169                         goto out;
3170                 if (!ret) {
3171                         fprintf(stderr,
3172                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3173                                 orphan->disk_bytenr, orphan->disk_len);
3174                         ret = btrfs_free_extent(trans,
3175                                         root->fs_info->extent_root,
3176                                         orphan->disk_bytenr, orphan->disk_len,
3177                                         0, root->objectid, orphan->objectid,
3178                                         orphan->offset);
3179                         if (ret < 0)
3180                                 goto out;
3181                 }
3182                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3183                                 orphan->offset, orphan->disk_bytenr,
3184                                 orphan->disk_len, orphan->disk_len);
3185                 if (ret < 0)
3186                         goto out;
3187
3188                 /* Update file size info */
3189                 rec->found_size += orphan->disk_len;
3190                 if (rec->found_size == rec->nbytes)
3191                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3192
3193                 /* Update the file extent hole info too */
3194                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3195                                            orphan->disk_len);
3196                 if (ret < 0)
3197                         goto out;
3198                 if (RB_EMPTY_ROOT(&rec->holes))
3199                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3200
3201                 list_del(&orphan->list);
3202                 free(orphan);
3203         }
3204         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3205 out:
3206         return ret;
3207 }
3208
3209 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3210                                         struct btrfs_root *root,
3211                                         struct btrfs_path *path,
3212                                         struct inode_record *rec)
3213 {
3214         struct rb_node *node;
3215         struct file_extent_hole *hole;
3216         int found = 0;
3217         int ret = 0;
3218
3219         node = rb_first(&rec->holes);
3220
3221         while (node) {
3222                 found = 1;
3223                 hole = rb_entry(node, struct file_extent_hole, node);
3224                 ret = btrfs_punch_hole(trans, root, rec->ino,
3225                                        hole->start, hole->len);
3226                 if (ret < 0)
3227                         goto out;
3228                 ret = del_file_extent_hole(&rec->holes, hole->start,
3229                                            hole->len);
3230                 if (ret < 0)
3231                         goto out;
3232                 if (RB_EMPTY_ROOT(&rec->holes))
3233                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3234                 node = rb_first(&rec->holes);
3235         }
3236         /* special case for a file losing all its file extent */
3237         if (!found) {
3238                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3239                                        round_up(rec->isize,
3240                                                 root->fs_info->sectorsize));
3241                 if (ret < 0)
3242                         goto out;
3243         }
3244         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3245                rec->ino, root->objectid);
3246 out:
3247         return ret;
3248 }
3249
3250 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3251 {
3252         struct btrfs_trans_handle *trans;
3253         struct btrfs_path path;
3254         int ret = 0;
3255
3256         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3257                              I_ERR_NO_ORPHAN_ITEM |
3258                              I_ERR_LINK_COUNT_WRONG |
3259                              I_ERR_NO_INODE_ITEM |
3260                              I_ERR_FILE_EXTENT_ORPHAN |
3261                              I_ERR_FILE_EXTENT_DISCOUNT|
3262                              I_ERR_FILE_NBYTES_WRONG)))
3263                 return rec->errors;
3264
3265         /*
3266          * For nlink repair, it may create a dir and add link, so
3267          * 2 for parent(256)'s dir_index and dir_item
3268          * 2 for lost+found dir's inode_item and inode_ref
3269          * 1 for the new inode_ref of the file
3270          * 2 for lost+found dir's dir_index and dir_item for the file
3271          */
3272         trans = btrfs_start_transaction(root, 7);
3273         if (IS_ERR(trans))
3274                 return PTR_ERR(trans);
3275
3276         btrfs_init_path(&path);
3277         if (rec->errors & I_ERR_NO_INODE_ITEM)
3278                 ret = repair_inode_no_item(trans, root, &path, rec);
3279         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3280                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3281         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3282                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3283         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3284                 ret = repair_inode_isize(trans, root, &path, rec);
3285         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3286                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3287         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3288                 ret = repair_inode_nlinks(trans, root, &path, rec);
3289         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3290                 ret = repair_inode_nbytes(trans, root, &path, rec);
3291         btrfs_commit_transaction(trans, root);
3292         btrfs_release_path(&path);
3293         return ret;
3294 }
3295
3296 static int check_inode_recs(struct btrfs_root *root,
3297                             struct cache_tree *inode_cache)
3298 {
3299         struct cache_extent *cache;
3300         struct ptr_node *node;
3301         struct inode_record *rec;
3302         struct inode_backref *backref;
3303         int stage = 0;
3304         int ret = 0;
3305         int err = 0;
3306         u64 error = 0;
3307         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3308
3309         if (btrfs_root_refs(&root->root_item) == 0) {
3310                 if (!cache_tree_empty(inode_cache))
3311                         fprintf(stderr, "warning line %d\n", __LINE__);
3312                 return 0;
3313         }
3314
3315         /*
3316          * We need to repair backrefs first because we could change some of the
3317          * errors in the inode recs.
3318          *
3319          * We also need to go through and delete invalid backrefs first and then
3320          * add the correct ones second.  We do this because we may get EEXIST
3321          * when adding back the correct index because we hadn't yet deleted the
3322          * invalid index.
3323          *
3324          * For example, if we were missing a dir index then the directories
3325          * isize would be wrong, so if we fixed the isize to what we thought it
3326          * would be and then fixed the backref we'd still have a invalid fs, so
3327          * we need to add back the dir index and then check to see if the isize
3328          * is still wrong.
3329          */
3330         while (stage < 3) {
3331                 stage++;
3332                 if (stage == 3 && !err)
3333                         break;
3334
3335                 cache = search_cache_extent(inode_cache, 0);
3336                 while (repair && cache) {
3337                         node = container_of(cache, struct ptr_node, cache);
3338                         rec = node->data;
3339                         cache = next_cache_extent(cache);
3340
3341                         /* Need to free everything up and rescan */
3342                         if (stage == 3) {
3343                                 remove_cache_extent(inode_cache, &node->cache);
3344                                 free(node);
3345                                 free_inode_rec(rec);
3346                                 continue;
3347                         }
3348
3349                         if (list_empty(&rec->backrefs))
3350                                 continue;
3351
3352                         ret = repair_inode_backrefs(root, rec, inode_cache,
3353                                                     stage == 1);
3354                         if (ret < 0) {
3355                                 err = ret;
3356                                 stage = 2;
3357                                 break;
3358                         } if (ret > 0) {
3359                                 err = -EAGAIN;
3360                         }
3361                 }
3362         }
3363         if (err)
3364                 return err;
3365
3366         rec = get_inode_rec(inode_cache, root_dirid, 0);
3367         BUG_ON(IS_ERR(rec));
3368         if (rec) {
3369                 ret = check_root_dir(rec);
3370                 if (ret) {
3371                         fprintf(stderr, "root %llu root dir %llu error\n",
3372                                 (unsigned long long)root->root_key.objectid,
3373                                 (unsigned long long)root_dirid);
3374                         print_inode_error(root, rec);
3375                         error++;
3376                 }
3377         } else {
3378                 if (repair) {
3379                         struct btrfs_trans_handle *trans;
3380
3381                         trans = btrfs_start_transaction(root, 1);
3382                         if (IS_ERR(trans)) {
3383                                 err = PTR_ERR(trans);
3384                                 return err;
3385                         }
3386
3387                         fprintf(stderr,
3388                                 "root %llu missing its root dir, recreating\n",
3389                                 (unsigned long long)root->objectid);
3390
3391                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3392                         BUG_ON(ret);
3393
3394                         btrfs_commit_transaction(trans, root);
3395                         return -EAGAIN;
3396                 }
3397
3398                 fprintf(stderr, "root %llu root dir %llu not found\n",
3399                         (unsigned long long)root->root_key.objectid,
3400                         (unsigned long long)root_dirid);
3401         }
3402
3403         while (1) {
3404                 cache = search_cache_extent(inode_cache, 0);
3405                 if (!cache)
3406                         break;
3407                 node = container_of(cache, struct ptr_node, cache);
3408                 rec = node->data;
3409                 remove_cache_extent(inode_cache, &node->cache);
3410                 free(node);
3411                 if (rec->ino == root_dirid ||
3412                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3413                         free_inode_rec(rec);
3414                         continue;
3415                 }
3416
3417                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3418                         ret = check_orphan_item(root, rec->ino);
3419                         if (ret == 0)
3420                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3421                         if (can_free_inode_rec(rec)) {
3422                                 free_inode_rec(rec);
3423                                 continue;
3424                         }
3425                 }
3426
3427                 if (!rec->found_inode_item)
3428                         rec->errors |= I_ERR_NO_INODE_ITEM;
3429                 if (rec->found_link != rec->nlink)
3430                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3431                 if (repair) {
3432                         ret = try_repair_inode(root, rec);
3433                         if (ret == 0 && can_free_inode_rec(rec)) {
3434                                 free_inode_rec(rec);
3435                                 continue;
3436                         }
3437                         ret = 0;
3438                 }
3439
3440                 if (!(repair && ret == 0))
3441                         error++;
3442                 print_inode_error(root, rec);
3443                 list_for_each_entry(backref, &rec->backrefs, list) {
3444                         if (!backref->found_dir_item)
3445                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3446                         if (!backref->found_dir_index)
3447                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3448                         if (!backref->found_inode_ref)
3449                                 backref->errors |= REF_ERR_NO_INODE_REF;
3450                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3451                                 " namelen %u name %s filetype %d errors %x",
3452                                 (unsigned long long)backref->dir,
3453                                 (unsigned long long)backref->index,
3454                                 backref->namelen, backref->name,
3455                                 backref->filetype, backref->errors);
3456                         print_ref_error(backref->errors);
3457                 }
3458                 free_inode_rec(rec);
3459         }
3460         return (error > 0) ? -1 : 0;
3461 }
3462
3463 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3464                                         u64 objectid)
3465 {
3466         struct cache_extent *cache;
3467         struct root_record *rec = NULL;
3468         int ret;
3469
3470         cache = lookup_cache_extent(root_cache, objectid, 1);
3471         if (cache) {
3472                 rec = container_of(cache, struct root_record, cache);
3473         } else {
3474                 rec = calloc(1, sizeof(*rec));
3475                 if (!rec)
3476                         return ERR_PTR(-ENOMEM);
3477                 rec->objectid = objectid;
3478                 INIT_LIST_HEAD(&rec->backrefs);
3479                 rec->cache.start = objectid;
3480                 rec->cache.size = 1;
3481
3482                 ret = insert_cache_extent(root_cache, &rec->cache);
3483                 if (ret)
3484                         return ERR_PTR(-EEXIST);
3485         }
3486         return rec;
3487 }
3488
3489 static struct root_backref *get_root_backref(struct root_record *rec,
3490                                              u64 ref_root, u64 dir, u64 index,
3491                                              const char *name, int namelen)
3492 {
3493         struct root_backref *backref;
3494
3495         list_for_each_entry(backref, &rec->backrefs, list) {
3496                 if (backref->ref_root != ref_root || backref->dir != dir ||
3497                     backref->namelen != namelen)
3498                         continue;
3499                 if (memcmp(name, backref->name, namelen))
3500                         continue;
3501                 return backref;
3502         }
3503
3504         backref = calloc(1, sizeof(*backref) + namelen + 1);
3505         if (!backref)
3506                 return NULL;
3507         backref->ref_root = ref_root;
3508         backref->dir = dir;
3509         backref->index = index;
3510         backref->namelen = namelen;
3511         memcpy(backref->name, name, namelen);
3512         backref->name[namelen] = '\0';
3513         list_add_tail(&backref->list, &rec->backrefs);
3514         return backref;
3515 }
3516
3517 static void free_root_record(struct cache_extent *cache)
3518 {
3519         struct root_record *rec;
3520         struct root_backref *backref;
3521
3522         rec = container_of(cache, struct root_record, cache);
3523         while (!list_empty(&rec->backrefs)) {
3524                 backref = to_root_backref(rec->backrefs.next);
3525                 list_del(&backref->list);
3526                 free(backref);
3527         }
3528
3529         free(rec);
3530 }
3531
3532 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3533
3534 static int add_root_backref(struct cache_tree *root_cache,
3535                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3536                             const char *name, int namelen,
3537                             int item_type, int errors)
3538 {
3539         struct root_record *rec;
3540         struct root_backref *backref;
3541
3542         rec = get_root_rec(root_cache, root_id);
3543         BUG_ON(IS_ERR(rec));
3544         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3545         BUG_ON(!backref);
3546
3547         backref->errors |= errors;
3548
3549         if (item_type != BTRFS_DIR_ITEM_KEY) {
3550                 if (backref->found_dir_index || backref->found_back_ref ||
3551                     backref->found_forward_ref) {
3552                         if (backref->index != index)
3553                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3554                 } else {
3555                         backref->index = index;
3556                 }
3557         }
3558
3559         if (item_type == BTRFS_DIR_ITEM_KEY) {
3560                 if (backref->found_forward_ref)
3561                         rec->found_ref++;
3562                 backref->found_dir_item = 1;
3563         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3564                 backref->found_dir_index = 1;
3565         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3566                 if (backref->found_forward_ref)
3567                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3568                 else if (backref->found_dir_item)
3569                         rec->found_ref++;
3570                 backref->found_forward_ref = 1;
3571         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3572                 if (backref->found_back_ref)
3573                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3574                 backref->found_back_ref = 1;
3575         } else {
3576                 BUG_ON(1);
3577         }
3578
3579         if (backref->found_forward_ref && backref->found_dir_item)
3580                 backref->reachable = 1;
3581         return 0;
3582 }
3583
3584 static int merge_root_recs(struct btrfs_root *root,
3585                            struct cache_tree *src_cache,
3586                            struct cache_tree *dst_cache)
3587 {
3588         struct cache_extent *cache;
3589         struct ptr_node *node;
3590         struct inode_record *rec;
3591         struct inode_backref *backref;
3592         int ret = 0;
3593
3594         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3595                 free_inode_recs_tree(src_cache);
3596                 return 0;
3597         }
3598
3599         while (1) {
3600                 cache = search_cache_extent(src_cache, 0);
3601                 if (!cache)
3602                         break;
3603                 node = container_of(cache, struct ptr_node, cache);
3604                 rec = node->data;
3605                 remove_cache_extent(src_cache, &node->cache);
3606                 free(node);
3607
3608                 ret = is_child_root(root, root->objectid, rec->ino);
3609                 if (ret < 0)
3610                         break;
3611                 else if (ret == 0)
3612                         goto skip;
3613
3614                 list_for_each_entry(backref, &rec->backrefs, list) {
3615                         BUG_ON(backref->found_inode_ref);
3616                         if (backref->found_dir_item)
3617                                 add_root_backref(dst_cache, rec->ino,
3618                                         root->root_key.objectid, backref->dir,
3619                                         backref->index, backref->name,
3620                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3621                                         backref->errors);
3622                         if (backref->found_dir_index)
3623                                 add_root_backref(dst_cache, rec->ino,
3624                                         root->root_key.objectid, backref->dir,
3625                                         backref->index, backref->name,
3626                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3627                                         backref->errors);
3628                 }
3629 skip:
3630                 free_inode_rec(rec);
3631         }
3632         if (ret < 0)
3633                 return ret;
3634         return 0;
3635 }
3636
3637 static int check_root_refs(struct btrfs_root *root,
3638                            struct cache_tree *root_cache)
3639 {
3640         struct root_record *rec;
3641         struct root_record *ref_root;
3642         struct root_backref *backref;
3643         struct cache_extent *cache;
3644         int loop = 1;
3645         int ret;
3646         int error;
3647         int errors = 0;
3648
3649         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3650         BUG_ON(IS_ERR(rec));
3651         rec->found_ref = 1;
3652
3653         /* fixme: this can not detect circular references */
3654         while (loop) {
3655                 loop = 0;
3656                 cache = search_cache_extent(root_cache, 0);
3657                 while (1) {
3658                         if (!cache)
3659                                 break;
3660                         rec = container_of(cache, struct root_record, cache);
3661                         cache = next_cache_extent(cache);
3662
3663                         if (rec->found_ref == 0)
3664                                 continue;
3665
3666                         list_for_each_entry(backref, &rec->backrefs, list) {
3667                                 if (!backref->reachable)
3668                                         continue;
3669
3670                                 ref_root = get_root_rec(root_cache,
3671                                                         backref->ref_root);
3672                                 BUG_ON(IS_ERR(ref_root));
3673                                 if (ref_root->found_ref > 0)
3674                                         continue;
3675
3676                                 backref->reachable = 0;
3677                                 rec->found_ref--;
3678                                 if (rec->found_ref == 0)
3679                                         loop = 1;
3680                         }
3681                 }
3682         }
3683
3684         cache = search_cache_extent(root_cache, 0);
3685         while (1) {
3686                 if (!cache)
3687                         break;
3688                 rec = container_of(cache, struct root_record, cache);
3689                 cache = next_cache_extent(cache);
3690
3691                 if (rec->found_ref == 0 &&
3692                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3693                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3694                         ret = check_orphan_item(root->fs_info->tree_root,
3695                                                 rec->objectid);
3696                         if (ret == 0)
3697                                 continue;
3698
3699                         /*
3700                          * If we don't have a root item then we likely just have
3701                          * a dir item in a snapshot for this root but no actual
3702                          * ref key or anything so it's meaningless.
3703                          */
3704                         if (!rec->found_root_item)
3705                                 continue;
3706                         errors++;
3707                         fprintf(stderr, "fs tree %llu not referenced\n",
3708                                 (unsigned long long)rec->objectid);
3709                 }
3710
3711                 error = 0;
3712                 if (rec->found_ref > 0 && !rec->found_root_item)
3713                         error = 1;
3714                 list_for_each_entry(backref, &rec->backrefs, list) {
3715                         if (!backref->found_dir_item)
3716                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3717                         if (!backref->found_dir_index)
3718                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3719                         if (!backref->found_back_ref)
3720                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3721                         if (!backref->found_forward_ref)
3722                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3723                         if (backref->reachable && backref->errors)
3724                                 error = 1;
3725                 }
3726                 if (!error)
3727                         continue;
3728
3729                 errors++;
3730                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3731                         (unsigned long long)rec->objectid, rec->found_ref,
3732                          rec->found_root_item ? "" : "not found");
3733
3734                 list_for_each_entry(backref, &rec->backrefs, list) {
3735                         if (!backref->reachable)
3736                                 continue;
3737                         if (!backref->errors && rec->found_root_item)
3738                                 continue;
3739                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3740                                 " index %llu namelen %u name %s errors %x\n",
3741                                 (unsigned long long)backref->ref_root,
3742                                 (unsigned long long)backref->dir,
3743                                 (unsigned long long)backref->index,
3744                                 backref->namelen, backref->name,
3745                                 backref->errors);
3746                         print_ref_error(backref->errors);
3747                 }
3748         }
3749         return errors > 0 ? 1 : 0;
3750 }
3751
3752 static int process_root_ref(struct extent_buffer *eb, int slot,
3753                             struct btrfs_key *key,
3754                             struct cache_tree *root_cache)
3755 {
3756         u64 dirid;
3757         u64 index;
3758         u32 len;
3759         u32 name_len;
3760         struct btrfs_root_ref *ref;
3761         char namebuf[BTRFS_NAME_LEN];
3762         int error;
3763
3764         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3765
3766         dirid = btrfs_root_ref_dirid(eb, ref);
3767         index = btrfs_root_ref_sequence(eb, ref);
3768         name_len = btrfs_root_ref_name_len(eb, ref);
3769
3770         if (name_len <= BTRFS_NAME_LEN) {
3771                 len = name_len;
3772                 error = 0;
3773         } else {
3774                 len = BTRFS_NAME_LEN;
3775                 error = REF_ERR_NAME_TOO_LONG;
3776         }
3777         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3778
3779         if (key->type == BTRFS_ROOT_REF_KEY) {
3780                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3781                                  index, namebuf, len, key->type, error);
3782         } else {
3783                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3784                                  index, namebuf, len, key->type, error);
3785         }
3786         return 0;
3787 }
3788
3789 static void free_corrupt_block(struct cache_extent *cache)
3790 {
3791         struct btrfs_corrupt_block *corrupt;
3792
3793         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3794         free(corrupt);
3795 }
3796
3797 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3798
3799 /*
3800  * Repair the btree of the given root.
3801  *
3802  * The fix is to remove the node key in corrupt_blocks cache_tree.
3803  * and rebalance the tree.
3804  * After the fix, the btree should be writeable.
3805  */
3806 static int repair_btree(struct btrfs_root *root,
3807                         struct cache_tree *corrupt_blocks)
3808 {
3809         struct btrfs_trans_handle *trans;
3810         struct btrfs_path path;
3811         struct btrfs_corrupt_block *corrupt;
3812         struct cache_extent *cache;
3813         struct btrfs_key key;
3814         u64 offset;
3815         int level;
3816         int ret = 0;
3817
3818         if (cache_tree_empty(corrupt_blocks))
3819                 return 0;
3820
3821         trans = btrfs_start_transaction(root, 1);
3822         if (IS_ERR(trans)) {
3823                 ret = PTR_ERR(trans);
3824                 fprintf(stderr, "Error starting transaction: %s\n",
3825                         strerror(-ret));
3826                 return ret;
3827         }
3828         btrfs_init_path(&path);
3829         cache = first_cache_extent(corrupt_blocks);
3830         while (cache) {
3831                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3832                                        cache);
3833                 level = corrupt->level;
3834                 path.lowest_level = level;
3835                 key.objectid = corrupt->key.objectid;
3836                 key.type = corrupt->key.type;
3837                 key.offset = corrupt->key.offset;
3838
3839                 /*
3840                  * Here we don't want to do any tree balance, since it may
3841                  * cause a balance with corrupted brother leaf/node,
3842                  * so ins_len set to 0 here.
3843                  * Balance will be done after all corrupt node/leaf is deleted.
3844                  */
3845                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 offset = btrfs_node_blockptr(path.nodes[level],
3849                                              path.slots[level]);
3850
3851                 /* Remove the ptr */
3852                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3853                 if (ret < 0)
3854                         goto out;
3855                 /*
3856                  * Remove the corresponding extent
3857                  * return value is not concerned.
3858                  */
3859                 btrfs_release_path(&path);
3860                 ret = btrfs_free_extent(trans, root, offset,
3861                                 root->fs_info->nodesize, 0,
3862                                 root->root_key.objectid, level - 1, 0);
3863                 cache = next_cache_extent(cache);
3864         }
3865
3866         /* Balance the btree using btrfs_search_slot() */
3867         cache = first_cache_extent(corrupt_blocks);
3868         while (cache) {
3869                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3870                                        cache);
3871                 memcpy(&key, &corrupt->key, sizeof(key));
3872                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3873                 if (ret < 0)
3874                         goto out;
3875                 /* return will always >0 since it won't find the item */
3876                 ret = 0;
3877                 btrfs_release_path(&path);
3878                 cache = next_cache_extent(cache);
3879         }
3880 out:
3881         btrfs_commit_transaction(trans, root);
3882         btrfs_release_path(&path);
3883         return ret;
3884 }
3885
3886 static int check_fs_root(struct btrfs_root *root,
3887                          struct cache_tree *root_cache,
3888                          struct walk_control *wc)
3889 {
3890         int ret = 0;
3891         int err = 0;
3892         int wret;
3893         int level;
3894         struct btrfs_path path;
3895         struct shared_node root_node;
3896         struct root_record *rec;
3897         struct btrfs_root_item *root_item = &root->root_item;
3898         struct cache_tree corrupt_blocks;
3899         struct orphan_data_extent *orphan;
3900         struct orphan_data_extent *tmp;
3901         enum btrfs_tree_block_status status;
3902         struct node_refs nrefs;
3903
3904         /*
3905          * Reuse the corrupt_block cache tree to record corrupted tree block
3906          *
3907          * Unlike the usage in extent tree check, here we do it in a per
3908          * fs/subvol tree base.
3909          */
3910         cache_tree_init(&corrupt_blocks);
3911         root->fs_info->corrupt_blocks = &corrupt_blocks;
3912
3913         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3914                 rec = get_root_rec(root_cache, root->root_key.objectid);
3915                 BUG_ON(IS_ERR(rec));
3916                 if (btrfs_root_refs(root_item) > 0)
3917                         rec->found_root_item = 1;
3918         }
3919
3920         btrfs_init_path(&path);
3921         memset(&root_node, 0, sizeof(root_node));
3922         cache_tree_init(&root_node.root_cache);
3923         cache_tree_init(&root_node.inode_cache);
3924         memset(&nrefs, 0, sizeof(nrefs));
3925
3926         /* Move the orphan extent record to corresponding inode_record */
3927         list_for_each_entry_safe(orphan, tmp,
3928                                  &root->orphan_data_extents, list) {
3929                 struct inode_record *inode;
3930
3931                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3932                                       1);
3933                 BUG_ON(IS_ERR(inode));
3934                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3935                 list_move(&orphan->list, &inode->orphan_extents);
3936         }
3937
3938         level = btrfs_header_level(root->node);
3939         memset(wc->nodes, 0, sizeof(wc->nodes));
3940         wc->nodes[level] = &root_node;
3941         wc->active_node = level;
3942         wc->root_level = level;
3943
3944         /* We may not have checked the root block, lets do that now */
3945         if (btrfs_is_leaf(root->node))
3946                 status = btrfs_check_leaf(root, NULL, root->node);
3947         else
3948                 status = btrfs_check_node(root, NULL, root->node);
3949         if (status != BTRFS_TREE_BLOCK_CLEAN)
3950                 return -EIO;
3951
3952         if (btrfs_root_refs(root_item) > 0 ||
3953             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3954                 path.nodes[level] = root->node;
3955                 extent_buffer_get(root->node);
3956                 path.slots[level] = 0;
3957         } else {
3958                 struct btrfs_key key;
3959                 struct btrfs_disk_key found_key;
3960
3961                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3962                 level = root_item->drop_level;
3963                 path.lowest_level = level;
3964                 if (level > btrfs_header_level(root->node) ||
3965                     level >= BTRFS_MAX_LEVEL) {
3966                         error("ignoring invalid drop level: %u", level);
3967                         goto skip_walking;
3968                 }
3969                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3970                 if (wret < 0)
3971                         goto skip_walking;
3972                 btrfs_node_key(path.nodes[level], &found_key,
3973                                 path.slots[level]);
3974                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3975                                         sizeof(found_key)));
3976         }
3977
3978         while (1) {
3979                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3980                 if (wret < 0)
3981                         ret = wret;
3982                 if (wret != 0)
3983                         break;
3984
3985                 wret = walk_up_tree(root, &path, wc, &level);
3986                 if (wret < 0)
3987                         ret = wret;
3988                 if (wret != 0)
3989                         break;
3990         }
3991 skip_walking:
3992         btrfs_release_path(&path);
3993
3994         if (!cache_tree_empty(&corrupt_blocks)) {
3995                 struct cache_extent *cache;
3996                 struct btrfs_corrupt_block *corrupt;
3997
3998                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3999                        root->root_key.objectid);
4000                 cache = first_cache_extent(&corrupt_blocks);
4001                 while (cache) {
4002                         corrupt = container_of(cache,
4003                                                struct btrfs_corrupt_block,
4004                                                cache);
4005                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4006                                cache->start, corrupt->level,
4007                                corrupt->key.objectid, corrupt->key.type,
4008                                corrupt->key.offset);
4009                         cache = next_cache_extent(cache);
4010                 }
4011                 if (repair) {
4012                         printf("Try to repair the btree for root %llu\n",
4013                                root->root_key.objectid);
4014                         ret = repair_btree(root, &corrupt_blocks);
4015                         if (ret < 0)
4016                                 fprintf(stderr, "Failed to repair btree: %s\n",
4017                                         strerror(-ret));
4018                         if (!ret)
4019                                 printf("Btree for root %llu is fixed\n",
4020                                        root->root_key.objectid);
4021                 }
4022         }
4023
4024         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4025         if (err < 0)
4026                 ret = err;
4027
4028         if (root_node.current) {
4029                 root_node.current->checked = 1;
4030                 maybe_free_inode_rec(&root_node.inode_cache,
4031                                 root_node.current);
4032         }
4033
4034         err = check_inode_recs(root, &root_node.inode_cache);
4035         if (!ret)
4036                 ret = err;
4037
4038         free_corrupt_blocks_tree(&corrupt_blocks);
4039         root->fs_info->corrupt_blocks = NULL;
4040         free_orphan_data_extents(&root->orphan_data_extents);
4041         return ret;
4042 }
4043
4044 static int fs_root_objectid(u64 objectid)
4045 {
4046         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4047             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4048                 return 1;
4049         return is_fstree(objectid);
4050 }
4051
4052 static int check_fs_roots(struct btrfs_root *root,
4053                           struct cache_tree *root_cache)
4054 {
4055         struct btrfs_path path;
4056         struct btrfs_key key;
4057         struct walk_control wc;
4058         struct extent_buffer *leaf, *tree_node;
4059         struct btrfs_root *tmp_root;
4060         struct btrfs_root *tree_root = root->fs_info->tree_root;
4061         int ret;
4062         int err = 0;
4063
4064         if (ctx.progress_enabled) {
4065                 ctx.tp = TASK_FS_ROOTS;
4066                 task_start(ctx.info);
4067         }
4068
4069         /*
4070          * Just in case we made any changes to the extent tree that weren't
4071          * reflected into the free space cache yet.
4072          */
4073         if (repair)
4074                 reset_cached_block_groups(root->fs_info);
4075         memset(&wc, 0, sizeof(wc));
4076         cache_tree_init(&wc.shared);
4077         btrfs_init_path(&path);
4078
4079 again:
4080         key.offset = 0;
4081         key.objectid = 0;
4082         key.type = BTRFS_ROOT_ITEM_KEY;
4083         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4084         if (ret < 0) {
4085                 err = 1;
4086                 goto out;
4087         }
4088         tree_node = tree_root->node;
4089         while (1) {
4090                 if (tree_node != tree_root->node) {
4091                         free_root_recs_tree(root_cache);
4092                         btrfs_release_path(&path);
4093                         goto again;
4094                 }
4095                 leaf = path.nodes[0];
4096                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4097                         ret = btrfs_next_leaf(tree_root, &path);
4098                         if (ret) {
4099                                 if (ret < 0)
4100                                         err = 1;
4101                                 break;
4102                         }
4103                         leaf = path.nodes[0];
4104                 }
4105                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4106                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4107                     fs_root_objectid(key.objectid)) {
4108                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4109                                 tmp_root = btrfs_read_fs_root_no_cache(
4110                                                 root->fs_info, &key);
4111                         } else {
4112                                 key.offset = (u64)-1;
4113                                 tmp_root = btrfs_read_fs_root(
4114                                                 root->fs_info, &key);
4115                         }
4116                         if (IS_ERR(tmp_root)) {
4117                                 err = 1;
4118                                 goto next;
4119                         }
4120                         ret = check_fs_root(tmp_root, root_cache, &wc);
4121                         if (ret == -EAGAIN) {
4122                                 free_root_recs_tree(root_cache);
4123                                 btrfs_release_path(&path);
4124                                 goto again;
4125                         }
4126                         if (ret)
4127                                 err = 1;
4128                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4129                                 btrfs_free_fs_root(tmp_root);
4130                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4131                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4132                         process_root_ref(leaf, path.slots[0], &key,
4133                                          root_cache);
4134                 }
4135 next:
4136                 path.slots[0]++;
4137         }
4138 out:
4139         btrfs_release_path(&path);
4140         if (err)
4141                 free_extent_cache_tree(&wc.shared);
4142         if (!cache_tree_empty(&wc.shared))
4143                 fprintf(stderr, "warning line %d\n", __LINE__);
4144
4145         task_stop(ctx.info);
4146
4147         return err;
4148 }
4149
4150 /*
4151  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4152  * INODE_REF/INODE_EXTREF match.
4153  *
4154  * @root:       the root of the fs/file tree
4155  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4156  * @key:        the key of the DIR_ITEM/DIR_INDEX
4157  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4158  *              distinguish root_dir between normal dir/file
4159  * @name:       the name in the INODE_REF/INODE_EXTREF
4160  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4161  * @mode:       the st_mode of INODE_ITEM
4162  *
4163  * Return 0 if no error occurred.
4164  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4165  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4166  * dir/file.
4167  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4168  * not match for normal dir/file.
4169  */
4170 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4171                          struct btrfs_key *key, u64 index, char *name,
4172                          u32 namelen, u32 mode)
4173 {
4174         struct btrfs_path path;
4175         struct extent_buffer *node;
4176         struct btrfs_dir_item *di;
4177         struct btrfs_key location;
4178         char namebuf[BTRFS_NAME_LEN] = {0};
4179         u32 total;
4180         u32 cur = 0;
4181         u32 len;
4182         u32 name_len;
4183         u32 data_len;
4184         u8 filetype;
4185         int slot;
4186         int ret;
4187
4188         btrfs_init_path(&path);
4189         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4190         if (ret < 0) {
4191                 ret = DIR_ITEM_MISSING;
4192                 goto out;
4193         }
4194
4195         /* Process root dir and goto out*/
4196         if (index == 0) {
4197                 if (ret == 0) {
4198                         ret = ROOT_DIR_ERROR;
4199                         error(
4200                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4201                                 root->objectid,
4202                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4203                                         "REF" : "EXTREF",
4204                                 ref_key->objectid, ref_key->offset,
4205                                 key->type == BTRFS_DIR_ITEM_KEY ?
4206                                         "DIR_ITEM" : "DIR_INDEX");
4207                 } else {
4208                         ret = 0;
4209                 }
4210
4211                 goto out;
4212         }
4213
4214         /* Process normal file/dir */
4215         if (ret > 0) {
4216                 ret = DIR_ITEM_MISSING;
4217                 error(
4218                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4219                         root->objectid,
4220                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4221                         ref_key->objectid, ref_key->offset,
4222                         key->type == BTRFS_DIR_ITEM_KEY ?
4223                                 "DIR_ITEM" : "DIR_INDEX",
4224                         key->objectid, key->offset, namelen, name,
4225                         imode_to_type(mode));
4226                 goto out;
4227         }
4228
4229         /* Check whether inode_id/filetype/name match */
4230         node = path.nodes[0];
4231         slot = path.slots[0];
4232         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4233         total = btrfs_item_size_nr(node, slot);
4234         while (cur < total) {
4235                 ret = DIR_ITEM_MISMATCH;
4236                 name_len = btrfs_dir_name_len(node, di);
4237                 data_len = btrfs_dir_data_len(node, di);
4238
4239                 btrfs_dir_item_key_to_cpu(node, di, &location);
4240                 if (location.objectid != ref_key->objectid ||
4241                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4242                     location.offset != 0)
4243                         goto next;
4244
4245                 filetype = btrfs_dir_type(node, di);
4246                 if (imode_to_type(mode) != filetype)
4247                         goto next;
4248
4249                 if (cur + sizeof(*di) + name_len > total ||
4250                     name_len > BTRFS_NAME_LEN) {
4251                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4252                                 root->objectid,
4253                                 key->type == BTRFS_DIR_ITEM_KEY ?
4254                                 "DIR_ITEM" : "DIR_INDEX",
4255                                 key->objectid, key->offset, name_len);
4256
4257                         if (cur + sizeof(*di) > total)
4258                                 break;
4259                         len = min_t(u32, total - cur - sizeof(*di),
4260                                     BTRFS_NAME_LEN);
4261                 } else {
4262                         len = name_len;
4263                 }
4264
4265                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4266                 if (len != namelen || strncmp(namebuf, name, len))
4267                         goto next;
4268
4269                 ret = 0;
4270                 goto out;
4271 next:
4272                 len = sizeof(*di) + name_len + data_len;
4273                 di = (struct btrfs_dir_item *)((char *)di + len);
4274                 cur += len;
4275         }
4276         if (ret == DIR_ITEM_MISMATCH)
4277                 error(
4278                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4279                         root->objectid,
4280                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4281                         ref_key->objectid, ref_key->offset,
4282                         key->type == BTRFS_DIR_ITEM_KEY ?
4283                                 "DIR_ITEM" : "DIR_INDEX",
4284                         key->objectid, key->offset, namelen, name,
4285                         imode_to_type(mode));
4286 out:
4287         btrfs_release_path(&path);
4288         return ret;
4289 }
4290
4291 /*
4292  * Traverse the given INODE_REF and call find_dir_item() to find related
4293  * DIR_ITEM/DIR_INDEX.
4294  *
4295  * @root:       the root of the fs/file tree
4296  * @ref_key:    the key of the INODE_REF
4297  * @refs:       the count of INODE_REF
4298  * @mode:       the st_mode of INODE_ITEM
4299  *
4300  * Return 0 if no error occurred.
4301  */
4302 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4303                            struct extent_buffer *node, int slot, u64 *refs,
4304                            int mode)
4305 {
4306         struct btrfs_key key;
4307         struct btrfs_inode_ref *ref;
4308         char namebuf[BTRFS_NAME_LEN] = {0};
4309         u32 total;
4310         u32 cur = 0;
4311         u32 len;
4312         u32 name_len;
4313         u64 index;
4314         int ret, err = 0;
4315
4316         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4317         total = btrfs_item_size_nr(node, slot);
4318
4319 next:
4320         /* Update inode ref count */
4321         (*refs)++;
4322
4323         index = btrfs_inode_ref_index(node, ref);
4324         name_len = btrfs_inode_ref_name_len(node, ref);
4325         if (cur + sizeof(*ref) + name_len > total ||
4326             name_len > BTRFS_NAME_LEN) {
4327                 warning("root %llu INODE_REF[%llu %llu] name too long",
4328                         root->objectid, ref_key->objectid, ref_key->offset);
4329
4330                 if (total < cur + sizeof(*ref))
4331                         goto out;
4332                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4333         } else {
4334                 len = name_len;
4335         }
4336
4337         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4338
4339         /* Check root dir ref name */
4340         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4341                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4342                       root->objectid, ref_key->objectid, ref_key->offset,
4343                       namebuf);
4344                 err |= ROOT_DIR_ERROR;
4345         }
4346
4347         /* Find related DIR_INDEX */
4348         key.objectid = ref_key->offset;
4349         key.type = BTRFS_DIR_INDEX_KEY;
4350         key.offset = index;
4351         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4352         err |= ret;
4353
4354         /* Find related dir_item */
4355         key.objectid = ref_key->offset;
4356         key.type = BTRFS_DIR_ITEM_KEY;
4357         key.offset = btrfs_name_hash(namebuf, len);
4358         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4359         err |= ret;
4360
4361         len = sizeof(*ref) + name_len;
4362         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4363         cur += len;
4364         if (cur < total)
4365                 goto next;
4366
4367 out:
4368         return err;
4369 }
4370
4371 /*
4372  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4373  * DIR_ITEM/DIR_INDEX.
4374  *
4375  * @root:       the root of the fs/file tree
4376  * @ref_key:    the key of the INODE_EXTREF
4377  * @refs:       the count of INODE_EXTREF
4378  * @mode:       the st_mode of INODE_ITEM
4379  *
4380  * Return 0 if no error occurred.
4381  */
4382 static int check_inode_extref(struct btrfs_root *root,
4383                               struct btrfs_key *ref_key,
4384                               struct extent_buffer *node, int slot, u64 *refs,
4385                               int mode)
4386 {
4387         struct btrfs_key key;
4388         struct btrfs_inode_extref *extref;
4389         char namebuf[BTRFS_NAME_LEN] = {0};
4390         u32 total;
4391         u32 cur = 0;
4392         u32 len;
4393         u32 name_len;
4394         u64 index;
4395         u64 parent;
4396         int ret;
4397         int err = 0;
4398
4399         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4400         total = btrfs_item_size_nr(node, slot);
4401
4402 next:
4403         /* update inode ref count */
4404         (*refs)++;
4405         name_len = btrfs_inode_extref_name_len(node, extref);
4406         index = btrfs_inode_extref_index(node, extref);
4407         parent = btrfs_inode_extref_parent(node, extref);
4408         if (name_len <= BTRFS_NAME_LEN) {
4409                 len = name_len;
4410         } else {
4411                 len = BTRFS_NAME_LEN;
4412                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4413                         root->objectid, ref_key->objectid, ref_key->offset);
4414         }
4415         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4416
4417         /* Check root dir ref name */
4418         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4419                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4420                       root->objectid, ref_key->objectid, ref_key->offset,
4421                       namebuf);
4422                 err |= ROOT_DIR_ERROR;
4423         }
4424
4425         /* find related dir_index */
4426         key.objectid = parent;
4427         key.type = BTRFS_DIR_INDEX_KEY;
4428         key.offset = index;
4429         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4430         err |= ret;
4431
4432         /* find related dir_item */
4433         key.objectid = parent;
4434         key.type = BTRFS_DIR_ITEM_KEY;
4435         key.offset = btrfs_name_hash(namebuf, len);
4436         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4437         err |= ret;
4438
4439         len = sizeof(*extref) + name_len;
4440         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4441         cur += len;
4442
4443         if (cur < total)
4444                 goto next;
4445
4446         return err;
4447 }
4448
4449 /*
4450  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4451  * DIR_ITEM/DIR_INDEX match.
4452  *
4453  * @root:       the root of the fs/file tree
4454  * @key:        the key of the INODE_REF/INODE_EXTREF
4455  * @name:       the name in the INODE_REF/INODE_EXTREF
4456  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4457  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4458  * to (u64)-1
4459  * @ext_ref:    the EXTENDED_IREF feature
4460  *
4461  * Return 0 if no error occurred.
4462  * Return >0 for error bitmap
4463  */
4464 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4465                           char *name, int namelen, u64 index,
4466                           unsigned int ext_ref)
4467 {
4468         struct btrfs_path path;
4469         struct btrfs_inode_ref *ref;
4470         struct btrfs_inode_extref *extref;
4471         struct extent_buffer *node;
4472         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4473         u32 total;
4474         u32 cur = 0;
4475         u32 len;
4476         u32 ref_namelen;
4477         u64 ref_index;
4478         u64 parent;
4479         u64 dir_id;
4480         int slot;
4481         int ret;
4482
4483         btrfs_init_path(&path);
4484         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4485         if (ret) {
4486                 ret = INODE_REF_MISSING;
4487                 goto extref;
4488         }
4489
4490         node = path.nodes[0];
4491         slot = path.slots[0];
4492
4493         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4494         total = btrfs_item_size_nr(node, slot);
4495
4496         /* Iterate all entry of INODE_REF */
4497         while (cur < total) {
4498                 ret = INODE_REF_MISSING;
4499
4500                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4501                 ref_index = btrfs_inode_ref_index(node, ref);
4502                 if (index != (u64)-1 && index != ref_index)
4503                         goto next_ref;
4504
4505                 if (cur + sizeof(*ref) + ref_namelen > total ||
4506                     ref_namelen > BTRFS_NAME_LEN) {
4507                         warning("root %llu INODE %s[%llu %llu] name too long",
4508                                 root->objectid,
4509                                 key->type == BTRFS_INODE_REF_KEY ?
4510                                         "REF" : "EXTREF",
4511                                 key->objectid, key->offset);
4512
4513                         if (cur + sizeof(*ref) > total)
4514                                 break;
4515                         len = min_t(u32, total - cur - sizeof(*ref),
4516                                     BTRFS_NAME_LEN);
4517                 } else {
4518                         len = ref_namelen;
4519                 }
4520
4521                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4522                                    len);
4523
4524                 if (len != namelen || strncmp(ref_namebuf, name, len))
4525                         goto next_ref;
4526
4527                 ret = 0;
4528                 goto out;
4529 next_ref:
4530                 len = sizeof(*ref) + ref_namelen;
4531                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4532                 cur += len;
4533         }
4534
4535 extref:
4536         /* Skip if not support EXTENDED_IREF feature */
4537         if (!ext_ref)
4538                 goto out;
4539
4540         btrfs_release_path(&path);
4541         btrfs_init_path(&path);
4542
4543         dir_id = key->offset;
4544         key->type = BTRFS_INODE_EXTREF_KEY;
4545         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4546
4547         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4548         if (ret) {
4549                 ret = INODE_REF_MISSING;
4550                 goto out;
4551         }
4552
4553         node = path.nodes[0];
4554         slot = path.slots[0];
4555
4556         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4557         cur = 0;
4558         total = btrfs_item_size_nr(node, slot);
4559
4560         /* Iterate all entry of INODE_EXTREF */
4561         while (cur < total) {
4562                 ret = INODE_REF_MISSING;
4563
4564                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4565                 ref_index = btrfs_inode_extref_index(node, extref);
4566                 parent = btrfs_inode_extref_parent(node, extref);
4567                 if (index != (u64)-1 && index != ref_index)
4568                         goto next_extref;
4569
4570                 if (parent != dir_id)
4571                         goto next_extref;
4572
4573                 if (ref_namelen <= BTRFS_NAME_LEN) {
4574                         len = ref_namelen;
4575                 } else {
4576                         len = BTRFS_NAME_LEN;
4577                         warning("root %llu INODE %s[%llu %llu] name too long",
4578                                 root->objectid,
4579                                 key->type == BTRFS_INODE_REF_KEY ?
4580                                         "REF" : "EXTREF",
4581                                 key->objectid, key->offset);
4582                 }
4583                 read_extent_buffer(node, ref_namebuf,
4584                                    (unsigned long)(extref + 1), len);
4585
4586                 if (len != namelen || strncmp(ref_namebuf, name, len))
4587                         goto next_extref;
4588
4589                 ret = 0;
4590                 goto out;
4591
4592 next_extref:
4593                 len = sizeof(*extref) + ref_namelen;
4594                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4595                 cur += len;
4596
4597         }
4598 out:
4599         btrfs_release_path(&path);
4600         return ret;
4601 }
4602
4603 /*
4604  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4605  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4606  *
4607  * @root:       the root of the fs/file tree
4608  * @key:        the key of the INODE_REF/INODE_EXTREF
4609  * @size:       the st_size of the INODE_ITEM
4610  * @ext_ref:    the EXTENDED_IREF feature
4611  *
4612  * Return 0 if no error occurred.
4613  */
4614 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4615                           struct extent_buffer *node, int slot, u64 *size,
4616                           unsigned int ext_ref)
4617 {
4618         struct btrfs_dir_item *di;
4619         struct btrfs_inode_item *ii;
4620         struct btrfs_path path;
4621         struct btrfs_key location;
4622         char namebuf[BTRFS_NAME_LEN] = {0};
4623         u32 total;
4624         u32 cur = 0;
4625         u32 len;
4626         u32 name_len;
4627         u32 data_len;
4628         u8 filetype;
4629         u32 mode;
4630         u64 index;
4631         int ret;
4632         int err = 0;
4633
4634         /*
4635          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4636          * ignore index check.
4637          */
4638         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4639
4640         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4641         total = btrfs_item_size_nr(node, slot);
4642
4643         while (cur < total) {
4644                 data_len = btrfs_dir_data_len(node, di);
4645                 if (data_len)
4646                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4647                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4648                               "DIR_ITEM" : "DIR_INDEX",
4649                               key->objectid, key->offset, data_len);
4650
4651                 name_len = btrfs_dir_name_len(node, di);
4652                 if (cur + sizeof(*di) + name_len > total ||
4653                     name_len > BTRFS_NAME_LEN) {
4654                         warning("root %llu %s[%llu %llu] name too long",
4655                                 root->objectid,
4656                                 key->type == BTRFS_DIR_ITEM_KEY ?
4657                                 "DIR_ITEM" : "DIR_INDEX",
4658                                 key->objectid, key->offset);
4659
4660                         if (cur + sizeof(*di) > total)
4661                                 break;
4662                         len = min_t(u32, total - cur - sizeof(*di),
4663                                     BTRFS_NAME_LEN);
4664                 } else {
4665                         len = name_len;
4666                 }
4667                 (*size) += name_len;
4668
4669                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4670                 filetype = btrfs_dir_type(node, di);
4671
4672                 btrfs_init_path(&path);
4673                 btrfs_dir_item_key_to_cpu(node, di, &location);
4674
4675                 /* Ignore related ROOT_ITEM check */
4676                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4677                         goto next;
4678
4679                 /* Check relative INODE_ITEM(existence/filetype) */
4680                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4681                 if (ret) {
4682                         err |= INODE_ITEM_MISSING;
4683                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4684                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4685                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4686                               key->offset, location.objectid, name_len,
4687                               namebuf, filetype);
4688                         goto next;
4689                 }
4690
4691                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4692                                     struct btrfs_inode_item);
4693                 mode = btrfs_inode_mode(path.nodes[0], ii);
4694
4695                 if (imode_to_type(mode) != filetype) {
4696                         err |= INODE_ITEM_MISMATCH;
4697                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4698                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4699                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4700                               key->offset, name_len, namebuf, filetype);
4701                 }
4702
4703                 /* Check relative INODE_REF/INODE_EXTREF */
4704                 location.type = BTRFS_INODE_REF_KEY;
4705                 location.offset = key->objectid;
4706                 ret = find_inode_ref(root, &location, namebuf, len,
4707                                        index, ext_ref);
4708                 err |= ret;
4709                 if (ret & INODE_REF_MISSING)
4710                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4711                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4712                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4713                               key->offset, name_len, namebuf, filetype);
4714
4715 next:
4716                 btrfs_release_path(&path);
4717                 len = sizeof(*di) + name_len + data_len;
4718                 di = (struct btrfs_dir_item *)((char *)di + len);
4719                 cur += len;
4720
4721                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4722                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4723                               root->objectid, key->objectid, key->offset);
4724                         break;
4725                 }
4726         }
4727
4728         return err;
4729 }
4730
4731 /*
4732  * Check file extent datasum/hole, update the size of the file extents,
4733  * check and update the last offset of the file extent.
4734  *
4735  * @root:       the root of fs/file tree.
4736  * @fkey:       the key of the file extent.
4737  * @nodatasum:  INODE_NODATASUM feature.
4738  * @size:       the sum of all EXTENT_DATA items size for this inode.
4739  * @end:        the offset of the last extent.
4740  *
4741  * Return 0 if no error occurred.
4742  */
4743 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4744                              struct extent_buffer *node, int slot,
4745                              unsigned int nodatasum, u64 *size, u64 *end)
4746 {
4747         struct btrfs_file_extent_item *fi;
4748         u64 disk_bytenr;
4749         u64 disk_num_bytes;
4750         u64 extent_num_bytes;
4751         u64 extent_offset;
4752         u64 csum_found;         /* In byte size, sectorsize aligned */
4753         u64 search_start;       /* Logical range start we search for csum */
4754         u64 search_len;         /* Logical range len we search for csum */
4755         unsigned int extent_type;
4756         unsigned int is_hole;
4757         int compressed = 0;
4758         int ret;
4759         int err = 0;
4760
4761         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4762
4763         /* Check inline extent */
4764         extent_type = btrfs_file_extent_type(node, fi);
4765         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4766                 struct btrfs_item *e = btrfs_item_nr(slot);
4767                 u32 item_inline_len;
4768
4769                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4770                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4771                 compressed = btrfs_file_extent_compression(node, fi);
4772                 if (extent_num_bytes == 0) {
4773                         error(
4774                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4775                                 root->objectid, fkey->objectid, fkey->offset);
4776                         err |= FILE_EXTENT_ERROR;
4777                 }
4778                 if (!compressed && extent_num_bytes != item_inline_len) {
4779                         error(
4780                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4781                                 root->objectid, fkey->objectid, fkey->offset,
4782                                 extent_num_bytes, item_inline_len);
4783                         err |= FILE_EXTENT_ERROR;
4784                 }
4785                 *end += extent_num_bytes;
4786                 *size += extent_num_bytes;
4787                 return err;
4788         }
4789
4790         /* Check extent type */
4791         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4792                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4793                 err |= FILE_EXTENT_ERROR;
4794                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4795                       root->objectid, fkey->objectid, fkey->offset);
4796                 return err;
4797         }
4798
4799         /* Check REG_EXTENT/PREALLOC_EXTENT */
4800         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4801         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4802         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4803         extent_offset = btrfs_file_extent_offset(node, fi);
4804         compressed = btrfs_file_extent_compression(node, fi);
4805         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4806
4807         /*
4808          * Check EXTENT_DATA csum
4809          *
4810          * For plain (uncompressed) extent, we should only check the range
4811          * we're referring to, as it's possible that part of prealloc extent
4812          * has been written, and has csum:
4813          *
4814          * |<--- Original large preallocated extent A ---->|
4815          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4816          *      No csum                         Has csum
4817          *
4818          * For compressed extent, we should check the whole range.
4819          */
4820         if (!compressed) {
4821                 search_start = disk_bytenr + extent_offset;
4822                 search_len = extent_num_bytes;
4823         } else {
4824                 search_start = disk_bytenr;
4825                 search_len = disk_num_bytes;
4826         }
4827         ret = count_csum_range(root, search_start, search_len, &csum_found);
4828         if (csum_found > 0 && nodatasum) {
4829                 err |= ODD_CSUM_ITEM;
4830                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4831                       root->objectid, fkey->objectid, fkey->offset);
4832         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4833                    !is_hole && (ret < 0 || csum_found < search_len)) {
4834                 err |= CSUM_ITEM_MISSING;
4835                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4836                       root->objectid, fkey->objectid, fkey->offset,
4837                       csum_found, search_len);
4838         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4839                 err |= ODD_CSUM_ITEM;
4840                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4841                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4842         }
4843
4844         /* Check EXTENT_DATA hole */
4845         if (!no_holes && *end != fkey->offset) {
4846                 err |= FILE_EXTENT_ERROR;
4847                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4848                       root->objectid, fkey->objectid, fkey->offset);
4849         }
4850
4851         *end += extent_num_bytes;
4852         if (!is_hole)
4853                 *size += extent_num_bytes;
4854
4855         return err;
4856 }
4857
4858 /*
4859  * Check INODE_ITEM and related ITEMs (the same inode number)
4860  * 1. check link count
4861  * 2. check inode ref/extref
4862  * 3. check dir item/index
4863  *
4864  * @ext_ref:    the EXTENDED_IREF feature
4865  *
4866  * Return 0 if no error occurred.
4867  * Return >0 for error or hit the traversal is done(by error bitmap)
4868  */
4869 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4870                             unsigned int ext_ref)
4871 {
4872         struct extent_buffer *node;
4873         struct btrfs_inode_item *ii;
4874         struct btrfs_key key;
4875         u64 inode_id;
4876         u32 mode;
4877         u64 nlink;
4878         u64 nbytes;
4879         u64 isize;
4880         u64 size = 0;
4881         u64 refs = 0;
4882         u64 extent_end = 0;
4883         u64 extent_size = 0;
4884         unsigned int dir;
4885         unsigned int nodatasum;
4886         int slot;
4887         int ret;
4888         int err = 0;
4889
4890         node = path->nodes[0];
4891         slot = path->slots[0];
4892
4893         btrfs_item_key_to_cpu(node, &key, slot);
4894         inode_id = key.objectid;
4895
4896         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4897                 ret = btrfs_next_item(root, path);
4898                 if (ret > 0)
4899                         err |= LAST_ITEM;
4900                 return err;
4901         }
4902
4903         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4904         isize = btrfs_inode_size(node, ii);
4905         nbytes = btrfs_inode_nbytes(node, ii);
4906         mode = btrfs_inode_mode(node, ii);
4907         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4908         nlink = btrfs_inode_nlink(node, ii);
4909         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4910
4911         while (1) {
4912                 ret = btrfs_next_item(root, path);
4913                 if (ret < 0) {
4914                         /* out will fill 'err' rusing current statistics */
4915                         goto out;
4916                 } else if (ret > 0) {
4917                         err |= LAST_ITEM;
4918                         goto out;
4919                 }
4920
4921                 node = path->nodes[0];
4922                 slot = path->slots[0];
4923                 btrfs_item_key_to_cpu(node, &key, slot);
4924                 if (key.objectid != inode_id)
4925                         goto out;
4926
4927                 switch (key.type) {
4928                 case BTRFS_INODE_REF_KEY:
4929                         ret = check_inode_ref(root, &key, node, slot, &refs,
4930                                               mode);
4931                         err |= ret;
4932                         break;
4933                 case BTRFS_INODE_EXTREF_KEY:
4934                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4935                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4936                                         root->objectid, key.objectid,
4937                                         key.offset);
4938                         ret = check_inode_extref(root, &key, node, slot, &refs,
4939                                                  mode);
4940                         err |= ret;
4941                         break;
4942                 case BTRFS_DIR_ITEM_KEY:
4943                 case BTRFS_DIR_INDEX_KEY:
4944                         if (!dir) {
4945                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4946                                         root->objectid, inode_id,
4947                                         imode_to_type(mode), key.objectid,
4948                                         key.offset);
4949                         }
4950                         ret = check_dir_item(root, &key, node, slot, &size,
4951                                              ext_ref);
4952                         err |= ret;
4953                         break;
4954                 case BTRFS_EXTENT_DATA_KEY:
4955                         if (dir) {
4956                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4957                                         root->objectid, inode_id, key.objectid,
4958                                         key.offset);
4959                         }
4960                         ret = check_file_extent(root, &key, node, slot,
4961                                                 nodatasum, &extent_size,
4962                                                 &extent_end);
4963                         err |= ret;
4964                         break;
4965                 case BTRFS_XATTR_ITEM_KEY:
4966                         break;
4967                 default:
4968                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4969                               key.objectid, key.type, key.offset);
4970                 }
4971         }
4972
4973 out:
4974         /* verify INODE_ITEM nlink/isize/nbytes */
4975         if (dir) {
4976                 if (nlink != 1) {
4977                         err |= LINK_COUNT_ERROR;
4978                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4979                               root->objectid, inode_id, nlink);
4980                 }
4981
4982                 /*
4983                  * Just a warning, as dir inode nbytes is just an
4984                  * instructive value.
4985                  */
4986                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4987                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4988                                 root->objectid, inode_id,
4989                                 root->fs_info->nodesize);
4990                 }
4991
4992                 if (isize != size) {
4993                         err |= ISIZE_ERROR;
4994                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4995                               root->objectid, inode_id, isize, size);
4996                 }
4997         } else {
4998                 if (nlink != refs) {
4999                         err |= LINK_COUNT_ERROR;
5000                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5001                               root->objectid, inode_id, nlink, refs);
5002                 } else if (!nlink) {
5003                         err |= ORPHAN_ITEM;
5004                 }
5005
5006                 if (!nbytes && !no_holes && extent_end < isize) {
5007                         err |= NBYTES_ERROR;
5008                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5009                               root->objectid, inode_id, isize);
5010                 }
5011
5012                 if (nbytes != extent_size) {
5013                         err |= NBYTES_ERROR;
5014                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5015                               root->objectid, inode_id, nbytes, extent_size);
5016                 }
5017         }
5018
5019         return err;
5020 }
5021
5022 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5023 {
5024         struct btrfs_path path;
5025         struct btrfs_key key;
5026         int err = 0;
5027         int ret;
5028
5029         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5030         key.type = BTRFS_INODE_ITEM_KEY;
5031         key.offset = 0;
5032
5033         /* For root being dropped, we don't need to check first inode */
5034         if (btrfs_root_refs(&root->root_item) == 0 &&
5035             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5036             key.objectid)
5037                 return 0;
5038
5039         btrfs_init_path(&path);
5040
5041         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5042         if (ret < 0)
5043                 goto out;
5044         if (ret > 0) {
5045                 ret = 0;
5046                 err |= INODE_ITEM_MISSING;
5047                 error("first inode item of root %llu is missing",
5048                       root->objectid);
5049         }
5050
5051         err |= check_inode_item(root, &path, ext_ref);
5052         err &= ~LAST_ITEM;
5053         if (err && !ret)
5054                 ret = -EIO;
5055 out:
5056         btrfs_release_path(&path);
5057         return ret;
5058 }
5059
5060 /*
5061  * Iterate all item on the tree and call check_inode_item() to check.
5062  *
5063  * @root:       the root of the tree to be checked.
5064  * @ext_ref:    the EXTENDED_IREF feature
5065  *
5066  * Return 0 if no error found.
5067  * Return <0 for error.
5068  */
5069 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5070 {
5071         struct btrfs_path path;
5072         struct node_refs nrefs;
5073         struct btrfs_root_item *root_item = &root->root_item;
5074         int ret;
5075         int level;
5076         int err = 0;
5077
5078         /*
5079          * We need to manually check the first inode item(256)
5080          * As the following traversal function will only start from
5081          * the first inode item in the leaf, if inode item(256) is missing
5082          * we will just skip it forever.
5083          */
5084         ret = check_fs_first_inode(root, ext_ref);
5085         if (ret < 0)
5086                 return ret;
5087
5088         memset(&nrefs, 0, sizeof(nrefs));
5089         level = btrfs_header_level(root->node);
5090         btrfs_init_path(&path);
5091
5092         if (btrfs_root_refs(root_item) > 0 ||
5093             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5094                 path.nodes[level] = root->node;
5095                 path.slots[level] = 0;
5096                 extent_buffer_get(root->node);
5097         } else {
5098                 struct btrfs_key key;
5099
5100                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5101                 level = root_item->drop_level;
5102                 path.lowest_level = level;
5103                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5104                 if (ret < 0)
5105                         goto out;
5106                 ret = 0;
5107         }
5108
5109         while (1) {
5110                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5111                 err |= !!ret;
5112
5113                 /* if ret is negative, walk shall stop */
5114                 if (ret < 0) {
5115                         ret = err;
5116                         break;
5117                 }
5118
5119                 ret = walk_up_tree_v2(root, &path, &level);
5120                 if (ret != 0) {
5121                         /* Normal exit, reset ret to err */
5122                         ret = err;
5123                         break;
5124                 }
5125         }
5126
5127 out:
5128         btrfs_release_path(&path);
5129         return ret;
5130 }
5131
5132 /*
5133  * Find the relative ref for root_ref and root_backref.
5134  *
5135  * @root:       the root of the root tree.
5136  * @ref_key:    the key of the root ref.
5137  *
5138  * Return 0 if no error occurred.
5139  */
5140 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5141                           struct extent_buffer *node, int slot)
5142 {
5143         struct btrfs_path path;
5144         struct btrfs_key key;
5145         struct btrfs_root_ref *ref;
5146         struct btrfs_root_ref *backref;
5147         char ref_name[BTRFS_NAME_LEN] = {0};
5148         char backref_name[BTRFS_NAME_LEN] = {0};
5149         u64 ref_dirid;
5150         u64 ref_seq;
5151         u32 ref_namelen;
5152         u64 backref_dirid;
5153         u64 backref_seq;
5154         u32 backref_namelen;
5155         u32 len;
5156         int ret;
5157         int err = 0;
5158
5159         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5160         ref_dirid = btrfs_root_ref_dirid(node, ref);
5161         ref_seq = btrfs_root_ref_sequence(node, ref);
5162         ref_namelen = btrfs_root_ref_name_len(node, ref);
5163
5164         if (ref_namelen <= BTRFS_NAME_LEN) {
5165                 len = ref_namelen;
5166         } else {
5167                 len = BTRFS_NAME_LEN;
5168                 warning("%s[%llu %llu] ref_name too long",
5169                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5170                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5171                         ref_key->offset);
5172         }
5173         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5174
5175         /* Find relative root_ref */
5176         key.objectid = ref_key->offset;
5177         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5178         key.offset = ref_key->objectid;
5179
5180         btrfs_init_path(&path);
5181         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5182         if (ret) {
5183                 err |= ROOT_REF_MISSING;
5184                 error("%s[%llu %llu] couldn't find relative ref",
5185                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5186                       "ROOT_REF" : "ROOT_BACKREF",
5187                       ref_key->objectid, ref_key->offset);
5188                 goto out;
5189         }
5190
5191         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5192                                  struct btrfs_root_ref);
5193         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5194         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5195         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5196
5197         if (backref_namelen <= BTRFS_NAME_LEN) {
5198                 len = backref_namelen;
5199         } else {
5200                 len = BTRFS_NAME_LEN;
5201                 warning("%s[%llu %llu] ref_name too long",
5202                         key.type == BTRFS_ROOT_REF_KEY ?
5203                         "ROOT_REF" : "ROOT_BACKREF",
5204                         key.objectid, key.offset);
5205         }
5206         read_extent_buffer(path.nodes[0], backref_name,
5207                            (unsigned long)(backref + 1), len);
5208
5209         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5210             ref_namelen != backref_namelen ||
5211             strncmp(ref_name, backref_name, len)) {
5212                 err |= ROOT_REF_MISMATCH;
5213                 error("%s[%llu %llu] mismatch relative ref",
5214                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5215                       "ROOT_REF" : "ROOT_BACKREF",
5216                       ref_key->objectid, ref_key->offset);
5217         }
5218 out:
5219         btrfs_release_path(&path);
5220         return err;
5221 }
5222
5223 /*
5224  * Check all fs/file tree in low_memory mode.
5225  *
5226  * 1. for fs tree root item, call check_fs_root_v2()
5227  * 2. for fs tree root ref/backref, call check_root_ref()
5228  *
5229  * Return 0 if no error occurred.
5230  */
5231 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5232 {
5233         struct btrfs_root *tree_root = fs_info->tree_root;
5234         struct btrfs_root *cur_root = NULL;
5235         struct btrfs_path path;
5236         struct btrfs_key key;
5237         struct extent_buffer *node;
5238         unsigned int ext_ref;
5239         int slot;
5240         int ret;
5241         int err = 0;
5242
5243         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5244
5245         btrfs_init_path(&path);
5246         key.objectid = BTRFS_FS_TREE_OBJECTID;
5247         key.offset = 0;
5248         key.type = BTRFS_ROOT_ITEM_KEY;
5249
5250         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5251         if (ret < 0) {
5252                 err = ret;
5253                 goto out;
5254         } else if (ret > 0) {
5255                 err = -ENOENT;
5256                 goto out;
5257         }
5258
5259         while (1) {
5260                 node = path.nodes[0];
5261                 slot = path.slots[0];
5262                 btrfs_item_key_to_cpu(node, &key, slot);
5263                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5264                         goto out;
5265                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5266                     fs_root_objectid(key.objectid)) {
5267                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5268                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5269                                                                        &key);
5270                         } else {
5271                                 key.offset = (u64)-1;
5272                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5273                         }
5274
5275                         if (IS_ERR(cur_root)) {
5276                                 error("Fail to read fs/subvol tree: %lld",
5277                                       key.objectid);
5278                                 err = -EIO;
5279                                 goto next;
5280                         }
5281
5282                         ret = check_fs_root_v2(cur_root, ext_ref);
5283                         err |= ret;
5284
5285                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5286                                 btrfs_free_fs_root(cur_root);
5287                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5288                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5289                         ret = check_root_ref(tree_root, &key, node, slot);
5290                         err |= ret;
5291                 }
5292 next:
5293                 ret = btrfs_next_item(tree_root, &path);
5294                 if (ret > 0)
5295                         goto out;
5296                 if (ret < 0) {
5297                         err = ret;
5298                         goto out;
5299                 }
5300         }
5301
5302 out:
5303         btrfs_release_path(&path);
5304         return err;
5305 }
5306
5307 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5308 {
5309         struct list_head *cur = rec->backrefs.next;
5310         struct extent_backref *back;
5311         struct tree_backref *tback;
5312         struct data_backref *dback;
5313         u64 found = 0;
5314         int err = 0;
5315
5316         while(cur != &rec->backrefs) {
5317                 back = to_extent_backref(cur);
5318                 cur = cur->next;
5319                 if (!back->found_extent_tree) {
5320                         err = 1;
5321                         if (!print_errs)
5322                                 goto out;
5323                         if (back->is_data) {
5324                                 dback = to_data_backref(back);
5325                                 fprintf(stderr, "Backref %llu %s %llu"
5326                                         " owner %llu offset %llu num_refs %lu"
5327                                         " not found in extent tree\n",
5328                                         (unsigned long long)rec->start,
5329                                         back->full_backref ?
5330                                         "parent" : "root",
5331                                         back->full_backref ?
5332                                         (unsigned long long)dback->parent:
5333                                         (unsigned long long)dback->root,
5334                                         (unsigned long long)dback->owner,
5335                                         (unsigned long long)dback->offset,
5336                                         (unsigned long)dback->num_refs);
5337                         } else {
5338                                 tback = to_tree_backref(back);
5339                                 fprintf(stderr, "Backref %llu parent %llu"
5340                                         " root %llu not found in extent tree\n",
5341                                         (unsigned long long)rec->start,
5342                                         (unsigned long long)tback->parent,
5343                                         (unsigned long long)tback->root);
5344                         }
5345                 }
5346                 if (!back->is_data && !back->found_ref) {
5347                         err = 1;
5348                         if (!print_errs)
5349                                 goto out;
5350                         tback = to_tree_backref(back);
5351                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5352                                 (unsigned long long)rec->start,
5353                                 back->full_backref ? "parent" : "root",
5354                                 back->full_backref ?
5355                                 (unsigned long long)tback->parent :
5356                                 (unsigned long long)tback->root, back);
5357                 }
5358                 if (back->is_data) {
5359                         dback = to_data_backref(back);
5360                         if (dback->found_ref != dback->num_refs) {
5361                                 err = 1;
5362                                 if (!print_errs)
5363                                         goto out;
5364                                 fprintf(stderr, "Incorrect local backref count"
5365                                         " on %llu %s %llu owner %llu"
5366                                         " offset %llu found %u wanted %u back %p\n",
5367                                         (unsigned long long)rec->start,
5368                                         back->full_backref ?
5369                                         "parent" : "root",
5370                                         back->full_backref ?
5371                                         (unsigned long long)dback->parent:
5372                                         (unsigned long long)dback->root,
5373                                         (unsigned long long)dback->owner,
5374                                         (unsigned long long)dback->offset,
5375                                         dback->found_ref, dback->num_refs, back);
5376                         }
5377                         if (dback->disk_bytenr != rec->start) {
5378                                 err = 1;
5379                                 if (!print_errs)
5380                                         goto out;
5381                                 fprintf(stderr, "Backref disk bytenr does not"
5382                                         " match extent record, bytenr=%llu, "
5383                                         "ref bytenr=%llu\n",
5384                                         (unsigned long long)rec->start,
5385                                         (unsigned long long)dback->disk_bytenr);
5386                         }
5387
5388                         if (dback->bytes != rec->nr) {
5389                                 err = 1;
5390                                 if (!print_errs)
5391                                         goto out;
5392                                 fprintf(stderr, "Backref bytes do not match "
5393                                         "extent backref, bytenr=%llu, ref "
5394                                         "bytes=%llu, backref bytes=%llu\n",
5395                                         (unsigned long long)rec->start,
5396                                         (unsigned long long)rec->nr,
5397                                         (unsigned long long)dback->bytes);
5398                         }
5399                 }
5400                 if (!back->is_data) {
5401                         found += 1;
5402                 } else {
5403                         dback = to_data_backref(back);
5404                         found += dback->found_ref;
5405                 }
5406         }
5407         if (found != rec->refs) {
5408                 err = 1;
5409                 if (!print_errs)
5410                         goto out;
5411                 fprintf(stderr, "Incorrect global backref count "
5412                         "on %llu found %llu wanted %llu\n",
5413                         (unsigned long long)rec->start,
5414                         (unsigned long long)found,
5415                         (unsigned long long)rec->refs);
5416         }
5417 out:
5418         return err;
5419 }
5420
5421 static int free_all_extent_backrefs(struct extent_record *rec)
5422 {
5423         struct extent_backref *back;
5424         struct list_head *cur;
5425         while (!list_empty(&rec->backrefs)) {
5426                 cur = rec->backrefs.next;
5427                 back = to_extent_backref(cur);
5428                 list_del(cur);
5429                 free(back);
5430         }
5431         return 0;
5432 }
5433
5434 static void free_extent_record_cache(struct cache_tree *extent_cache)
5435 {
5436         struct cache_extent *cache;
5437         struct extent_record *rec;
5438
5439         while (1) {
5440                 cache = first_cache_extent(extent_cache);
5441                 if (!cache)
5442                         break;
5443                 rec = container_of(cache, struct extent_record, cache);
5444                 remove_cache_extent(extent_cache, cache);
5445                 free_all_extent_backrefs(rec);
5446                 free(rec);
5447         }
5448 }
5449
5450 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5451                                  struct extent_record *rec)
5452 {
5453         if (rec->content_checked && rec->owner_ref_checked &&
5454             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5455             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5456             !rec->bad_full_backref && !rec->crossing_stripes &&
5457             !rec->wrong_chunk_type) {
5458                 remove_cache_extent(extent_cache, &rec->cache);
5459                 free_all_extent_backrefs(rec);
5460                 list_del_init(&rec->list);
5461                 free(rec);
5462         }
5463         return 0;
5464 }
5465
5466 static int check_owner_ref(struct btrfs_root *root,
5467                             struct extent_record *rec,
5468                             struct extent_buffer *buf)
5469 {
5470         struct extent_backref *node;
5471         struct tree_backref *back;
5472         struct btrfs_root *ref_root;
5473         struct btrfs_key key;
5474         struct btrfs_path path;
5475         struct extent_buffer *parent;
5476         int level;
5477         int found = 0;
5478         int ret;
5479
5480         list_for_each_entry(node, &rec->backrefs, list) {
5481                 if (node->is_data)
5482                         continue;
5483                 if (!node->found_ref)
5484                         continue;
5485                 if (node->full_backref)
5486                         continue;
5487                 back = to_tree_backref(node);
5488                 if (btrfs_header_owner(buf) == back->root)
5489                         return 0;
5490         }
5491         BUG_ON(rec->is_root);
5492
5493         /* try to find the block by search corresponding fs tree */
5494         key.objectid = btrfs_header_owner(buf);
5495         key.type = BTRFS_ROOT_ITEM_KEY;
5496         key.offset = (u64)-1;
5497
5498         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5499         if (IS_ERR(ref_root))
5500                 return 1;
5501
5502         level = btrfs_header_level(buf);
5503         if (level == 0)
5504                 btrfs_item_key_to_cpu(buf, &key, 0);
5505         else
5506                 btrfs_node_key_to_cpu(buf, &key, 0);
5507
5508         btrfs_init_path(&path);
5509         path.lowest_level = level + 1;
5510         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5511         if (ret < 0)
5512                 return 0;
5513
5514         parent = path.nodes[level + 1];
5515         if (parent && buf->start == btrfs_node_blockptr(parent,
5516                                                         path.slots[level + 1]))
5517                 found = 1;
5518
5519         btrfs_release_path(&path);
5520         return found ? 0 : 1;
5521 }
5522
5523 static int is_extent_tree_record(struct extent_record *rec)
5524 {
5525         struct list_head *cur = rec->backrefs.next;
5526         struct extent_backref *node;
5527         struct tree_backref *back;
5528         int is_extent = 0;
5529
5530         while(cur != &rec->backrefs) {
5531                 node = to_extent_backref(cur);
5532                 cur = cur->next;
5533                 if (node->is_data)
5534                         return 0;
5535                 back = to_tree_backref(node);
5536                 if (node->full_backref)
5537                         return 0;
5538                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5539                         is_extent = 1;
5540         }
5541         return is_extent;
5542 }
5543
5544
5545 static int record_bad_block_io(struct btrfs_fs_info *info,
5546                                struct cache_tree *extent_cache,
5547                                u64 start, u64 len)
5548 {
5549         struct extent_record *rec;
5550         struct cache_extent *cache;
5551         struct btrfs_key key;
5552
5553         cache = lookup_cache_extent(extent_cache, start, len);
5554         if (!cache)
5555                 return 0;
5556
5557         rec = container_of(cache, struct extent_record, cache);
5558         if (!is_extent_tree_record(rec))
5559                 return 0;
5560
5561         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5562         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5563 }
5564
5565 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5566                        struct extent_buffer *buf, int slot)
5567 {
5568         if (btrfs_header_level(buf)) {
5569                 struct btrfs_key_ptr ptr1, ptr2;
5570
5571                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5572                                    sizeof(struct btrfs_key_ptr));
5573                 read_extent_buffer(buf, &ptr2,
5574                                    btrfs_node_key_ptr_offset(slot + 1),
5575                                    sizeof(struct btrfs_key_ptr));
5576                 write_extent_buffer(buf, &ptr1,
5577                                     btrfs_node_key_ptr_offset(slot + 1),
5578                                     sizeof(struct btrfs_key_ptr));
5579                 write_extent_buffer(buf, &ptr2,
5580                                     btrfs_node_key_ptr_offset(slot),
5581                                     sizeof(struct btrfs_key_ptr));
5582                 if (slot == 0) {
5583                         struct btrfs_disk_key key;
5584                         btrfs_node_key(buf, &key, 0);
5585                         btrfs_fixup_low_keys(root, path, &key,
5586                                              btrfs_header_level(buf) + 1);
5587                 }
5588         } else {
5589                 struct btrfs_item *item1, *item2;
5590                 struct btrfs_key k1, k2;
5591                 char *item1_data, *item2_data;
5592                 u32 item1_offset, item2_offset, item1_size, item2_size;
5593
5594                 item1 = btrfs_item_nr(slot);
5595                 item2 = btrfs_item_nr(slot + 1);
5596                 btrfs_item_key_to_cpu(buf, &k1, slot);
5597                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5598                 item1_offset = btrfs_item_offset(buf, item1);
5599                 item2_offset = btrfs_item_offset(buf, item2);
5600                 item1_size = btrfs_item_size(buf, item1);
5601                 item2_size = btrfs_item_size(buf, item2);
5602
5603                 item1_data = malloc(item1_size);
5604                 if (!item1_data)
5605                         return -ENOMEM;
5606                 item2_data = malloc(item2_size);
5607                 if (!item2_data) {
5608                         free(item1_data);
5609                         return -ENOMEM;
5610                 }
5611
5612                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5613                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5614
5615                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5616                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5617                 free(item1_data);
5618                 free(item2_data);
5619
5620                 btrfs_set_item_offset(buf, item1, item2_offset);
5621                 btrfs_set_item_offset(buf, item2, item1_offset);
5622                 btrfs_set_item_size(buf, item1, item2_size);
5623                 btrfs_set_item_size(buf, item2, item1_size);
5624
5625                 path->slots[0] = slot;
5626                 btrfs_set_item_key_unsafe(root, path, &k2);
5627                 path->slots[0] = slot + 1;
5628                 btrfs_set_item_key_unsafe(root, path, &k1);
5629         }
5630         return 0;
5631 }
5632
5633 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5634 {
5635         struct extent_buffer *buf;
5636         struct btrfs_key k1, k2;
5637         int i;
5638         int level = path->lowest_level;
5639         int ret = -EIO;
5640
5641         buf = path->nodes[level];
5642         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5643                 if (level) {
5644                         btrfs_node_key_to_cpu(buf, &k1, i);
5645                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5646                 } else {
5647                         btrfs_item_key_to_cpu(buf, &k1, i);
5648                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5649                 }
5650                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5651                         continue;
5652                 ret = swap_values(root, path, buf, i);
5653                 if (ret)
5654                         break;
5655                 btrfs_mark_buffer_dirty(buf);
5656                 i = 0;
5657         }
5658         return ret;
5659 }
5660
5661 static int delete_bogus_item(struct btrfs_root *root,
5662                              struct btrfs_path *path,
5663                              struct extent_buffer *buf, int slot)
5664 {
5665         struct btrfs_key key;
5666         int nritems = btrfs_header_nritems(buf);
5667
5668         btrfs_item_key_to_cpu(buf, &key, slot);
5669
5670         /* These are all the keys we can deal with missing. */
5671         if (key.type != BTRFS_DIR_INDEX_KEY &&
5672             key.type != BTRFS_EXTENT_ITEM_KEY &&
5673             key.type != BTRFS_METADATA_ITEM_KEY &&
5674             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5675             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5676                 return -1;
5677
5678         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5679                (unsigned long long)key.objectid, key.type,
5680                (unsigned long long)key.offset, slot, buf->start);
5681         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5682                               btrfs_item_nr_offset(slot + 1),
5683                               sizeof(struct btrfs_item) *
5684                               (nritems - slot - 1));
5685         btrfs_set_header_nritems(buf, nritems - 1);
5686         if (slot == 0) {
5687                 struct btrfs_disk_key disk_key;
5688
5689                 btrfs_item_key(buf, &disk_key, 0);
5690                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5691         }
5692         btrfs_mark_buffer_dirty(buf);
5693         return 0;
5694 }
5695
5696 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5697 {
5698         struct extent_buffer *buf;
5699         int i;
5700         int ret = 0;
5701
5702         /* We should only get this for leaves */
5703         BUG_ON(path->lowest_level);
5704         buf = path->nodes[0];
5705 again:
5706         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5707                 unsigned int shift = 0, offset;
5708
5709                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5710                     BTRFS_LEAF_DATA_SIZE(root)) {
5711                         if (btrfs_item_end_nr(buf, i) >
5712                             BTRFS_LEAF_DATA_SIZE(root)) {
5713                                 ret = delete_bogus_item(root, path, buf, i);
5714                                 if (!ret)
5715                                         goto again;
5716                                 fprintf(stderr, "item is off the end of the "
5717                                         "leaf, can't fix\n");
5718                                 ret = -EIO;
5719                                 break;
5720                         }
5721                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5722                                 btrfs_item_end_nr(buf, i);
5723                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5724                            btrfs_item_offset_nr(buf, i - 1)) {
5725                         if (btrfs_item_end_nr(buf, i) >
5726                             btrfs_item_offset_nr(buf, i - 1)) {
5727                                 ret = delete_bogus_item(root, path, buf, i);
5728                                 if (!ret)
5729                                         goto again;
5730                                 fprintf(stderr, "items overlap, can't fix\n");
5731                                 ret = -EIO;
5732                                 break;
5733                         }
5734                         shift = btrfs_item_offset_nr(buf, i - 1) -
5735                                 btrfs_item_end_nr(buf, i);
5736                 }
5737                 if (!shift)
5738                         continue;
5739
5740                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5741                        i, shift, (unsigned long long)buf->start);
5742                 offset = btrfs_item_offset_nr(buf, i);
5743                 memmove_extent_buffer(buf,
5744                                       btrfs_leaf_data(buf) + offset + shift,
5745                                       btrfs_leaf_data(buf) + offset,
5746                                       btrfs_item_size_nr(buf, i));
5747                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5748                                       offset + shift);
5749                 btrfs_mark_buffer_dirty(buf);
5750         }
5751
5752         /*
5753          * We may have moved things, in which case we want to exit so we don't
5754          * write those changes out.  Once we have proper abort functionality in
5755          * progs this can be changed to something nicer.
5756          */
5757         BUG_ON(ret);
5758         return ret;
5759 }
5760
5761 /*
5762  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5763  * then just return -EIO.
5764  */
5765 static int try_to_fix_bad_block(struct btrfs_root *root,
5766                                 struct extent_buffer *buf,
5767                                 enum btrfs_tree_block_status status)
5768 {
5769         struct btrfs_trans_handle *trans;
5770         struct ulist *roots;
5771         struct ulist_node *node;
5772         struct btrfs_root *search_root;
5773         struct btrfs_path path;
5774         struct ulist_iterator iter;
5775         struct btrfs_key root_key, key;
5776         int ret;
5777
5778         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5779             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780                 return -EIO;
5781
5782         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5783         if (ret)
5784                 return -EIO;
5785
5786         btrfs_init_path(&path);
5787         ULIST_ITER_INIT(&iter);
5788         while ((node = ulist_next(roots, &iter))) {
5789                 root_key.objectid = node->val;
5790                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5791                 root_key.offset = (u64)-1;
5792
5793                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5794                 if (IS_ERR(root)) {
5795                         ret = -EIO;
5796                         break;
5797                 }
5798
5799
5800                 trans = btrfs_start_transaction(search_root, 0);
5801                 if (IS_ERR(trans)) {
5802                         ret = PTR_ERR(trans);
5803                         break;
5804                 }
5805
5806                 path.lowest_level = btrfs_header_level(buf);
5807                 path.skip_check_block = 1;
5808                 if (path.lowest_level)
5809                         btrfs_node_key_to_cpu(buf, &key, 0);
5810                 else
5811                         btrfs_item_key_to_cpu(buf, &key, 0);
5812                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5813                 if (ret) {
5814                         ret = -EIO;
5815                         btrfs_commit_transaction(trans, search_root);
5816                         break;
5817                 }
5818                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5819                         ret = fix_key_order(search_root, &path);
5820                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5821                         ret = fix_item_offset(search_root, &path);
5822                 if (ret) {
5823                         btrfs_commit_transaction(trans, search_root);
5824                         break;
5825                 }
5826                 btrfs_release_path(&path);
5827                 btrfs_commit_transaction(trans, search_root);
5828         }
5829         ulist_free(roots);
5830         btrfs_release_path(&path);
5831         return ret;
5832 }
5833
5834 static int check_block(struct btrfs_root *root,
5835                        struct cache_tree *extent_cache,
5836                        struct extent_buffer *buf, u64 flags)
5837 {
5838         struct extent_record *rec;
5839         struct cache_extent *cache;
5840         struct btrfs_key key;
5841         enum btrfs_tree_block_status status;
5842         int ret = 0;
5843         int level;
5844
5845         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5846         if (!cache)
5847                 return 1;
5848         rec = container_of(cache, struct extent_record, cache);
5849         rec->generation = btrfs_header_generation(buf);
5850
5851         level = btrfs_header_level(buf);
5852         if (btrfs_header_nritems(buf) > 0) {
5853
5854                 if (level == 0)
5855                         btrfs_item_key_to_cpu(buf, &key, 0);
5856                 else
5857                         btrfs_node_key_to_cpu(buf, &key, 0);
5858
5859                 rec->info_objectid = key.objectid;
5860         }
5861         rec->info_level = level;
5862
5863         if (btrfs_is_leaf(buf))
5864                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5865         else
5866                 status = btrfs_check_node(root, &rec->parent_key, buf);
5867
5868         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5869                 if (repair)
5870                         status = try_to_fix_bad_block(root, buf, status);
5871                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5872                         ret = -EIO;
5873                         fprintf(stderr, "bad block %llu\n",
5874                                 (unsigned long long)buf->start);
5875                 } else {
5876                         /*
5877                          * Signal to callers we need to start the scan over
5878                          * again since we'll have cowed blocks.
5879                          */
5880                         ret = -EAGAIN;
5881                 }
5882         } else {
5883                 rec->content_checked = 1;
5884                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5885                         rec->owner_ref_checked = 1;
5886                 else {
5887                         ret = check_owner_ref(root, rec, buf);
5888                         if (!ret)
5889                                 rec->owner_ref_checked = 1;
5890                 }
5891         }
5892         if (!ret)
5893                 maybe_free_extent_rec(extent_cache, rec);
5894         return ret;
5895 }
5896
5897 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5898                                                 u64 parent, u64 root)
5899 {
5900         struct list_head *cur = rec->backrefs.next;
5901         struct extent_backref *node;
5902         struct tree_backref *back;
5903
5904         while(cur != &rec->backrefs) {
5905                 node = to_extent_backref(cur);
5906                 cur = cur->next;
5907                 if (node->is_data)
5908                         continue;
5909                 back = to_tree_backref(node);
5910                 if (parent > 0) {
5911                         if (!node->full_backref)
5912                                 continue;
5913                         if (parent == back->parent)
5914                                 return back;
5915                 } else {
5916                         if (node->full_backref)
5917                                 continue;
5918                         if (back->root == root)
5919                                 return back;
5920                 }
5921         }
5922         return NULL;
5923 }
5924
5925 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5926                                                 u64 parent, u64 root)
5927 {
5928         struct tree_backref *ref = malloc(sizeof(*ref));
5929
5930         if (!ref)
5931                 return NULL;
5932         memset(&ref->node, 0, sizeof(ref->node));
5933         if (parent > 0) {
5934                 ref->parent = parent;
5935                 ref->node.full_backref = 1;
5936         } else {
5937                 ref->root = root;
5938                 ref->node.full_backref = 0;
5939         }
5940         list_add_tail(&ref->node.list, &rec->backrefs);
5941
5942         return ref;
5943 }
5944
5945 static struct data_backref *find_data_backref(struct extent_record *rec,
5946                                                 u64 parent, u64 root,
5947                                                 u64 owner, u64 offset,
5948                                                 int found_ref,
5949                                                 u64 disk_bytenr, u64 bytes)
5950 {
5951         struct list_head *cur = rec->backrefs.next;
5952         struct extent_backref *node;
5953         struct data_backref *back;
5954
5955         while(cur != &rec->backrefs) {
5956                 node = to_extent_backref(cur);
5957                 cur = cur->next;
5958                 if (!node->is_data)
5959                         continue;
5960                 back = to_data_backref(node);
5961                 if (parent > 0) {
5962                         if (!node->full_backref)
5963                                 continue;
5964                         if (parent == back->parent)
5965                                 return back;
5966                 } else {
5967                         if (node->full_backref)
5968                                 continue;
5969                         if (back->root == root && back->owner == owner &&
5970                             back->offset == offset) {
5971                                 if (found_ref && node->found_ref &&
5972                                     (back->bytes != bytes ||
5973                                     back->disk_bytenr != disk_bytenr))
5974                                         continue;
5975                                 return back;
5976                         }
5977                 }
5978         }
5979         return NULL;
5980 }
5981
5982 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5983                                                 u64 parent, u64 root,
5984                                                 u64 owner, u64 offset,
5985                                                 u64 max_size)
5986 {
5987         struct data_backref *ref = malloc(sizeof(*ref));
5988
5989         if (!ref)
5990                 return NULL;
5991         memset(&ref->node, 0, sizeof(ref->node));
5992         ref->node.is_data = 1;
5993
5994         if (parent > 0) {
5995                 ref->parent = parent;
5996                 ref->owner = 0;
5997                 ref->offset = 0;
5998                 ref->node.full_backref = 1;
5999         } else {
6000                 ref->root = root;
6001                 ref->owner = owner;
6002                 ref->offset = offset;
6003                 ref->node.full_backref = 0;
6004         }
6005         ref->bytes = max_size;
6006         ref->found_ref = 0;
6007         ref->num_refs = 0;
6008         list_add_tail(&ref->node.list, &rec->backrefs);
6009         if (max_size > rec->max_size)
6010                 rec->max_size = max_size;
6011         return ref;
6012 }
6013
6014 /* Check if the type of extent matches with its chunk */
6015 static void check_extent_type(struct extent_record *rec)
6016 {
6017         struct btrfs_block_group_cache *bg_cache;
6018
6019         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6020         if (!bg_cache)
6021                 return;
6022
6023         /* data extent, check chunk directly*/
6024         if (!rec->metadata) {
6025                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6026                         rec->wrong_chunk_type = 1;
6027                 return;
6028         }
6029
6030         /* metadata extent, check the obvious case first */
6031         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6032                                  BTRFS_BLOCK_GROUP_METADATA))) {
6033                 rec->wrong_chunk_type = 1;
6034                 return;
6035         }
6036
6037         /*
6038          * Check SYSTEM extent, as it's also marked as metadata, we can only
6039          * make sure it's a SYSTEM extent by its backref
6040          */
6041         if (!list_empty(&rec->backrefs)) {
6042                 struct extent_backref *node;
6043                 struct tree_backref *tback;
6044                 u64 bg_type;
6045
6046                 node = to_extent_backref(rec->backrefs.next);
6047                 if (node->is_data) {
6048                         /* tree block shouldn't have data backref */
6049                         rec->wrong_chunk_type = 1;
6050                         return;
6051                 }
6052                 tback = container_of(node, struct tree_backref, node);
6053
6054                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6055                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6056                 else
6057                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6058                 if (!(bg_cache->flags & bg_type))
6059                         rec->wrong_chunk_type = 1;
6060         }
6061 }
6062
6063 /*
6064  * Allocate a new extent record, fill default values from @tmpl and insert int
6065  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6066  * the cache, otherwise it fails.
6067  */
6068 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6069                 struct extent_record *tmpl)
6070 {
6071         struct extent_record *rec;
6072         int ret = 0;
6073
6074         BUG_ON(tmpl->max_size == 0);
6075         rec = malloc(sizeof(*rec));
6076         if (!rec)
6077                 return -ENOMEM;
6078         rec->start = tmpl->start;
6079         rec->max_size = tmpl->max_size;
6080         rec->nr = max(tmpl->nr, tmpl->max_size);
6081         rec->found_rec = tmpl->found_rec;
6082         rec->content_checked = tmpl->content_checked;
6083         rec->owner_ref_checked = tmpl->owner_ref_checked;
6084         rec->num_duplicates = 0;
6085         rec->metadata = tmpl->metadata;
6086         rec->flag_block_full_backref = FLAG_UNSET;
6087         rec->bad_full_backref = 0;
6088         rec->crossing_stripes = 0;
6089         rec->wrong_chunk_type = 0;
6090         rec->is_root = tmpl->is_root;
6091         rec->refs = tmpl->refs;
6092         rec->extent_item_refs = tmpl->extent_item_refs;
6093         rec->parent_generation = tmpl->parent_generation;
6094         INIT_LIST_HEAD(&rec->backrefs);
6095         INIT_LIST_HEAD(&rec->dups);
6096         INIT_LIST_HEAD(&rec->list);
6097         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6098         rec->cache.start = tmpl->start;
6099         rec->cache.size = tmpl->nr;
6100         ret = insert_cache_extent(extent_cache, &rec->cache);
6101         if (ret) {
6102                 free(rec);
6103                 return ret;
6104         }
6105         bytes_used += rec->nr;
6106
6107         if (tmpl->metadata)
6108                 rec->crossing_stripes = check_crossing_stripes(global_info,
6109                                 rec->start, global_info->nodesize);
6110         check_extent_type(rec);
6111         return ret;
6112 }
6113
6114 /*
6115  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6116  * some are hints:
6117  * - refs              - if found, increase refs
6118  * - is_root           - if found, set
6119  * - content_checked   - if found, set
6120  * - owner_ref_checked - if found, set
6121  *
6122  * If not found, create a new one, initialize and insert.
6123  */
6124 static int add_extent_rec(struct cache_tree *extent_cache,
6125                 struct extent_record *tmpl)
6126 {
6127         struct extent_record *rec;
6128         struct cache_extent *cache;
6129         int ret = 0;
6130         int dup = 0;
6131
6132         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6133         if (cache) {
6134                 rec = container_of(cache, struct extent_record, cache);
6135                 if (tmpl->refs)
6136                         rec->refs++;
6137                 if (rec->nr == 1)
6138                         rec->nr = max(tmpl->nr, tmpl->max_size);
6139
6140                 /*
6141                  * We need to make sure to reset nr to whatever the extent
6142                  * record says was the real size, this way we can compare it to
6143                  * the backrefs.
6144                  */
6145                 if (tmpl->found_rec) {
6146                         if (tmpl->start != rec->start || rec->found_rec) {
6147                                 struct extent_record *tmp;
6148
6149                                 dup = 1;
6150                                 if (list_empty(&rec->list))
6151                                         list_add_tail(&rec->list,
6152                                                       &duplicate_extents);
6153
6154                                 /*
6155                                  * We have to do this song and dance in case we
6156                                  * find an extent record that falls inside of
6157                                  * our current extent record but does not have
6158                                  * the same objectid.
6159                                  */
6160                                 tmp = malloc(sizeof(*tmp));
6161                                 if (!tmp)
6162                                         return -ENOMEM;
6163                                 tmp->start = tmpl->start;
6164                                 tmp->max_size = tmpl->max_size;
6165                                 tmp->nr = tmpl->nr;
6166                                 tmp->found_rec = 1;
6167                                 tmp->metadata = tmpl->metadata;
6168                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6169                                 INIT_LIST_HEAD(&tmp->list);
6170                                 list_add_tail(&tmp->list, &rec->dups);
6171                                 rec->num_duplicates++;
6172                         } else {
6173                                 rec->nr = tmpl->nr;
6174                                 rec->found_rec = 1;
6175                         }
6176                 }
6177
6178                 if (tmpl->extent_item_refs && !dup) {
6179                         if (rec->extent_item_refs) {
6180                                 fprintf(stderr, "block %llu rec "
6181                                         "extent_item_refs %llu, passed %llu\n",
6182                                         (unsigned long long)tmpl->start,
6183                                         (unsigned long long)
6184                                                         rec->extent_item_refs,
6185                                         (unsigned long long)tmpl->extent_item_refs);
6186                         }
6187                         rec->extent_item_refs = tmpl->extent_item_refs;
6188                 }
6189                 if (tmpl->is_root)
6190                         rec->is_root = 1;
6191                 if (tmpl->content_checked)
6192                         rec->content_checked = 1;
6193                 if (tmpl->owner_ref_checked)
6194                         rec->owner_ref_checked = 1;
6195                 memcpy(&rec->parent_key, &tmpl->parent_key,
6196                                 sizeof(tmpl->parent_key));
6197                 if (tmpl->parent_generation)
6198                         rec->parent_generation = tmpl->parent_generation;
6199                 if (rec->max_size < tmpl->max_size)
6200                         rec->max_size = tmpl->max_size;
6201
6202                 /*
6203                  * A metadata extent can't cross stripe_len boundary, otherwise
6204                  * kernel scrub won't be able to handle it.
6205                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6206                  * it.
6207                  */
6208                 if (tmpl->metadata)
6209                         rec->crossing_stripes = check_crossing_stripes(
6210                                         global_info, rec->start,
6211                                         global_info->nodesize);
6212                 check_extent_type(rec);
6213                 maybe_free_extent_rec(extent_cache, rec);
6214                 return ret;
6215         }
6216
6217         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6218
6219         return ret;
6220 }
6221
6222 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6223                             u64 parent, u64 root, int found_ref)
6224 {
6225         struct extent_record *rec;
6226         struct tree_backref *back;
6227         struct cache_extent *cache;
6228         int ret;
6229
6230         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6231         if (!cache) {
6232                 struct extent_record tmpl;
6233
6234                 memset(&tmpl, 0, sizeof(tmpl));
6235                 tmpl.start = bytenr;
6236                 tmpl.nr = 1;
6237                 tmpl.metadata = 1;
6238                 tmpl.max_size = 1;
6239
6240                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6241                 if (ret)
6242                         return ret;
6243
6244                 /* really a bug in cache_extent implement now */
6245                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6246                 if (!cache)
6247                         return -ENOENT;
6248         }
6249
6250         rec = container_of(cache, struct extent_record, cache);
6251         if (rec->start != bytenr) {
6252                 /*
6253                  * Several cause, from unaligned bytenr to over lapping extents
6254                  */
6255                 return -EEXIST;
6256         }
6257
6258         back = find_tree_backref(rec, parent, root);
6259         if (!back) {
6260                 back = alloc_tree_backref(rec, parent, root);
6261                 if (!back)
6262                         return -ENOMEM;
6263         }
6264
6265         if (found_ref) {
6266                 if (back->node.found_ref) {
6267                         fprintf(stderr, "Extent back ref already exists "
6268                                 "for %llu parent %llu root %llu \n",
6269                                 (unsigned long long)bytenr,
6270                                 (unsigned long long)parent,
6271                                 (unsigned long long)root);
6272                 }
6273                 back->node.found_ref = 1;
6274         } else {
6275                 if (back->node.found_extent_tree) {
6276                         fprintf(stderr, "Extent back ref already exists "
6277                                 "for %llu parent %llu root %llu \n",
6278                                 (unsigned long long)bytenr,
6279                                 (unsigned long long)parent,
6280                                 (unsigned long long)root);
6281                 }
6282                 back->node.found_extent_tree = 1;
6283         }
6284         check_extent_type(rec);
6285         maybe_free_extent_rec(extent_cache, rec);
6286         return 0;
6287 }
6288
6289 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6290                             u64 parent, u64 root, u64 owner, u64 offset,
6291                             u32 num_refs, int found_ref, u64 max_size)
6292 {
6293         struct extent_record *rec;
6294         struct data_backref *back;
6295         struct cache_extent *cache;
6296         int ret;
6297
6298         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6299         if (!cache) {
6300                 struct extent_record tmpl;
6301
6302                 memset(&tmpl, 0, sizeof(tmpl));
6303                 tmpl.start = bytenr;
6304                 tmpl.nr = 1;
6305                 tmpl.max_size = max_size;
6306
6307                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6308                 if (ret)
6309                         return ret;
6310
6311                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6312                 if (!cache)
6313                         abort();
6314         }
6315
6316         rec = container_of(cache, struct extent_record, cache);
6317         if (rec->max_size < max_size)
6318                 rec->max_size = max_size;
6319
6320         /*
6321          * If found_ref is set then max_size is the real size and must match the
6322          * existing refs.  So if we have already found a ref then we need to
6323          * make sure that this ref matches the existing one, otherwise we need
6324          * to add a new backref so we can notice that the backrefs don't match
6325          * and we need to figure out who is telling the truth.  This is to
6326          * account for that awful fsync bug I introduced where we'd end up with
6327          * a btrfs_file_extent_item that would have its length include multiple
6328          * prealloc extents or point inside of a prealloc extent.
6329          */
6330         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6331                                  bytenr, max_size);
6332         if (!back) {
6333                 back = alloc_data_backref(rec, parent, root, owner, offset,
6334                                           max_size);
6335                 BUG_ON(!back);
6336         }
6337
6338         if (found_ref) {
6339                 BUG_ON(num_refs != 1);
6340                 if (back->node.found_ref)
6341                         BUG_ON(back->bytes != max_size);
6342                 back->node.found_ref = 1;
6343                 back->found_ref += 1;
6344                 back->bytes = max_size;
6345                 back->disk_bytenr = bytenr;
6346                 rec->refs += 1;
6347                 rec->content_checked = 1;
6348                 rec->owner_ref_checked = 1;
6349         } else {
6350                 if (back->node.found_extent_tree) {
6351                         fprintf(stderr, "Extent back ref already exists "
6352                                 "for %llu parent %llu root %llu "
6353                                 "owner %llu offset %llu num_refs %lu\n",
6354                                 (unsigned long long)bytenr,
6355                                 (unsigned long long)parent,
6356                                 (unsigned long long)root,
6357                                 (unsigned long long)owner,
6358                                 (unsigned long long)offset,
6359                                 (unsigned long)num_refs);
6360                 }
6361                 back->num_refs = num_refs;
6362                 back->node.found_extent_tree = 1;
6363         }
6364         maybe_free_extent_rec(extent_cache, rec);
6365         return 0;
6366 }
6367
6368 static int add_pending(struct cache_tree *pending,
6369                        struct cache_tree *seen, u64 bytenr, u32 size)
6370 {
6371         int ret;
6372         ret = add_cache_extent(seen, bytenr, size);
6373         if (ret)
6374                 return ret;
6375         add_cache_extent(pending, bytenr, size);
6376         return 0;
6377 }
6378
6379 static int pick_next_pending(struct cache_tree *pending,
6380                         struct cache_tree *reada,
6381                         struct cache_tree *nodes,
6382                         u64 last, struct block_info *bits, int bits_nr,
6383                         int *reada_bits)
6384 {
6385         unsigned long node_start = last;
6386         struct cache_extent *cache;
6387         int ret;
6388
6389         cache = search_cache_extent(reada, 0);
6390         if (cache) {
6391                 bits[0].start = cache->start;
6392                 bits[0].size = cache->size;
6393                 *reada_bits = 1;
6394                 return 1;
6395         }
6396         *reada_bits = 0;
6397         if (node_start > 32768)
6398                 node_start -= 32768;
6399
6400         cache = search_cache_extent(nodes, node_start);
6401         if (!cache)
6402                 cache = search_cache_extent(nodes, 0);
6403
6404         if (!cache) {
6405                  cache = search_cache_extent(pending, 0);
6406                  if (!cache)
6407                          return 0;
6408                  ret = 0;
6409                  do {
6410                          bits[ret].start = cache->start;
6411                          bits[ret].size = cache->size;
6412                          cache = next_cache_extent(cache);
6413                          ret++;
6414                  } while (cache && ret < bits_nr);
6415                  return ret;
6416         }
6417
6418         ret = 0;
6419         do {
6420                 bits[ret].start = cache->start;
6421                 bits[ret].size = cache->size;
6422                 cache = next_cache_extent(cache);
6423                 ret++;
6424         } while (cache && ret < bits_nr);
6425
6426         if (bits_nr - ret > 8) {
6427                 u64 lookup = bits[0].start + bits[0].size;
6428                 struct cache_extent *next;
6429                 next = search_cache_extent(pending, lookup);
6430                 while(next) {
6431                         if (next->start - lookup > 32768)
6432                                 break;
6433                         bits[ret].start = next->start;
6434                         bits[ret].size = next->size;
6435                         lookup = next->start + next->size;
6436                         ret++;
6437                         if (ret == bits_nr)
6438                                 break;
6439                         next = next_cache_extent(next);
6440                         if (!next)
6441                                 break;
6442                 }
6443         }
6444         return ret;
6445 }
6446
6447 static void free_chunk_record(struct cache_extent *cache)
6448 {
6449         struct chunk_record *rec;
6450
6451         rec = container_of(cache, struct chunk_record, cache);
6452         list_del_init(&rec->list);
6453         list_del_init(&rec->dextents);
6454         free(rec);
6455 }
6456
6457 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6458 {
6459         cache_tree_free_extents(chunk_cache, free_chunk_record);
6460 }
6461
6462 static void free_device_record(struct rb_node *node)
6463 {
6464         struct device_record *rec;
6465
6466         rec = container_of(node, struct device_record, node);
6467         free(rec);
6468 }
6469
6470 FREE_RB_BASED_TREE(device_cache, free_device_record);
6471
6472 int insert_block_group_record(struct block_group_tree *tree,
6473                               struct block_group_record *bg_rec)
6474 {
6475         int ret;
6476
6477         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6478         if (ret)
6479                 return ret;
6480
6481         list_add_tail(&bg_rec->list, &tree->block_groups);
6482         return 0;
6483 }
6484
6485 static void free_block_group_record(struct cache_extent *cache)
6486 {
6487         struct block_group_record *rec;
6488
6489         rec = container_of(cache, struct block_group_record, cache);
6490         list_del_init(&rec->list);
6491         free(rec);
6492 }
6493
6494 void free_block_group_tree(struct block_group_tree *tree)
6495 {
6496         cache_tree_free_extents(&tree->tree, free_block_group_record);
6497 }
6498
6499 int insert_device_extent_record(struct device_extent_tree *tree,
6500                                 struct device_extent_record *de_rec)
6501 {
6502         int ret;
6503
6504         /*
6505          * Device extent is a bit different from the other extents, because
6506          * the extents which belong to the different devices may have the
6507          * same start and size, so we need use the special extent cache
6508          * search/insert functions.
6509          */
6510         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6511         if (ret)
6512                 return ret;
6513
6514         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6515         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6516         return 0;
6517 }
6518
6519 static void free_device_extent_record(struct cache_extent *cache)
6520 {
6521         struct device_extent_record *rec;
6522
6523         rec = container_of(cache, struct device_extent_record, cache);
6524         if (!list_empty(&rec->chunk_list))
6525                 list_del_init(&rec->chunk_list);
6526         if (!list_empty(&rec->device_list))
6527                 list_del_init(&rec->device_list);
6528         free(rec);
6529 }
6530
6531 void free_device_extent_tree(struct device_extent_tree *tree)
6532 {
6533         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6534 }
6535
6536 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6537 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6538                                  struct extent_buffer *leaf, int slot)
6539 {
6540         struct btrfs_extent_ref_v0 *ref0;
6541         struct btrfs_key key;
6542         int ret;
6543
6544         btrfs_item_key_to_cpu(leaf, &key, slot);
6545         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6546         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6547                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6548                                 0, 0);
6549         } else {
6550                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6551                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6552         }
6553         return ret;
6554 }
6555 #endif
6556
6557 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6558                                             struct btrfs_key *key,
6559                                             int slot)
6560 {
6561         struct btrfs_chunk *ptr;
6562         struct chunk_record *rec;
6563         int num_stripes, i;
6564
6565         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6566         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6567
6568         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6569         if (!rec) {
6570                 fprintf(stderr, "memory allocation failed\n");
6571                 exit(-1);
6572         }
6573
6574         INIT_LIST_HEAD(&rec->list);
6575         INIT_LIST_HEAD(&rec->dextents);
6576         rec->bg_rec = NULL;
6577
6578         rec->cache.start = key->offset;
6579         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6580
6581         rec->generation = btrfs_header_generation(leaf);
6582
6583         rec->objectid = key->objectid;
6584         rec->type = key->type;
6585         rec->offset = key->offset;
6586
6587         rec->length = rec->cache.size;
6588         rec->owner = btrfs_chunk_owner(leaf, ptr);
6589         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6590         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6591         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6592         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6593         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6594         rec->num_stripes = num_stripes;
6595         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6596
6597         for (i = 0; i < rec->num_stripes; ++i) {
6598                 rec->stripes[i].devid =
6599                         btrfs_stripe_devid_nr(leaf, ptr, i);
6600                 rec->stripes[i].offset =
6601                         btrfs_stripe_offset_nr(leaf, ptr, i);
6602                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6603                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6604                                 BTRFS_UUID_SIZE);
6605         }
6606
6607         return rec;
6608 }
6609
6610 static int process_chunk_item(struct cache_tree *chunk_cache,
6611                               struct btrfs_key *key, struct extent_buffer *eb,
6612                               int slot)
6613 {
6614         struct chunk_record *rec;
6615         struct btrfs_chunk *chunk;
6616         int ret = 0;
6617
6618         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6619         /*
6620          * Do extra check for this chunk item,
6621          *
6622          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6623          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6624          * and owner<->key_type check.
6625          */
6626         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6627                                       key->offset);
6628         if (ret < 0) {
6629                 error("chunk(%llu, %llu) is not valid, ignore it",
6630                       key->offset, btrfs_chunk_length(eb, chunk));
6631                 return 0;
6632         }
6633         rec = btrfs_new_chunk_record(eb, key, slot);
6634         ret = insert_cache_extent(chunk_cache, &rec->cache);
6635         if (ret) {
6636                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6637                         rec->offset, rec->length);
6638                 free(rec);
6639         }
6640
6641         return ret;
6642 }
6643
6644 static int process_device_item(struct rb_root *dev_cache,
6645                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6646 {
6647         struct btrfs_dev_item *ptr;
6648         struct device_record *rec;
6649         int ret = 0;
6650
6651         ptr = btrfs_item_ptr(eb,
6652                 slot, struct btrfs_dev_item);
6653
6654         rec = malloc(sizeof(*rec));
6655         if (!rec) {
6656                 fprintf(stderr, "memory allocation failed\n");
6657                 return -ENOMEM;
6658         }
6659
6660         rec->devid = key->offset;
6661         rec->generation = btrfs_header_generation(eb);
6662
6663         rec->objectid = key->objectid;
6664         rec->type = key->type;
6665         rec->offset = key->offset;
6666
6667         rec->devid = btrfs_device_id(eb, ptr);
6668         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6669         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6670
6671         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6672         if (ret) {
6673                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6674                 free(rec);
6675         }
6676
6677         return ret;
6678 }
6679
6680 struct block_group_record *
6681 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6682                              int slot)
6683 {
6684         struct btrfs_block_group_item *ptr;
6685         struct block_group_record *rec;
6686
6687         rec = calloc(1, sizeof(*rec));
6688         if (!rec) {
6689                 fprintf(stderr, "memory allocation failed\n");
6690                 exit(-1);
6691         }
6692
6693         rec->cache.start = key->objectid;
6694         rec->cache.size = key->offset;
6695
6696         rec->generation = btrfs_header_generation(leaf);
6697
6698         rec->objectid = key->objectid;
6699         rec->type = key->type;
6700         rec->offset = key->offset;
6701
6702         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6703         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6704
6705         INIT_LIST_HEAD(&rec->list);
6706
6707         return rec;
6708 }
6709
6710 static int process_block_group_item(struct block_group_tree *block_group_cache,
6711                                     struct btrfs_key *key,
6712                                     struct extent_buffer *eb, int slot)
6713 {
6714         struct block_group_record *rec;
6715         int ret = 0;
6716
6717         rec = btrfs_new_block_group_record(eb, key, slot);
6718         ret = insert_block_group_record(block_group_cache, rec);
6719         if (ret) {
6720                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6721                         rec->objectid, rec->offset);
6722                 free(rec);
6723         }
6724
6725         return ret;
6726 }
6727
6728 struct device_extent_record *
6729 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6730                                struct btrfs_key *key, int slot)
6731 {
6732         struct device_extent_record *rec;
6733         struct btrfs_dev_extent *ptr;
6734
6735         rec = calloc(1, sizeof(*rec));
6736         if (!rec) {
6737                 fprintf(stderr, "memory allocation failed\n");
6738                 exit(-1);
6739         }
6740
6741         rec->cache.objectid = key->objectid;
6742         rec->cache.start = key->offset;
6743
6744         rec->generation = btrfs_header_generation(leaf);
6745
6746         rec->objectid = key->objectid;
6747         rec->type = key->type;
6748         rec->offset = key->offset;
6749
6750         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6751         rec->chunk_objecteid =
6752                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6753         rec->chunk_offset =
6754                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6755         rec->length = btrfs_dev_extent_length(leaf, ptr);
6756         rec->cache.size = rec->length;
6757
6758         INIT_LIST_HEAD(&rec->chunk_list);
6759         INIT_LIST_HEAD(&rec->device_list);
6760
6761         return rec;
6762 }
6763
6764 static int
6765 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6766                            struct btrfs_key *key, struct extent_buffer *eb,
6767                            int slot)
6768 {
6769         struct device_extent_record *rec;
6770         int ret;
6771
6772         rec = btrfs_new_device_extent_record(eb, key, slot);
6773         ret = insert_device_extent_record(dev_extent_cache, rec);
6774         if (ret) {
6775                 fprintf(stderr,
6776                         "Device extent[%llu, %llu, %llu] existed.\n",
6777                         rec->objectid, rec->offset, rec->length);
6778                 free(rec);
6779         }
6780
6781         return ret;
6782 }
6783
6784 static int process_extent_item(struct btrfs_root *root,
6785                                struct cache_tree *extent_cache,
6786                                struct extent_buffer *eb, int slot)
6787 {
6788         struct btrfs_extent_item *ei;
6789         struct btrfs_extent_inline_ref *iref;
6790         struct btrfs_extent_data_ref *dref;
6791         struct btrfs_shared_data_ref *sref;
6792         struct btrfs_key key;
6793         struct extent_record tmpl;
6794         unsigned long end;
6795         unsigned long ptr;
6796         int ret;
6797         int type;
6798         u32 item_size = btrfs_item_size_nr(eb, slot);
6799         u64 refs = 0;
6800         u64 offset;
6801         u64 num_bytes;
6802         int metadata = 0;
6803
6804         btrfs_item_key_to_cpu(eb, &key, slot);
6805
6806         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6807                 metadata = 1;
6808                 num_bytes = root->fs_info->nodesize;
6809         } else {
6810                 num_bytes = key.offset;
6811         }
6812
6813         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6814                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6815                       key.objectid, root->fs_info->sectorsize);
6816                 return -EIO;
6817         }
6818         if (item_size < sizeof(*ei)) {
6819 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6820                 struct btrfs_extent_item_v0 *ei0;
6821                 BUG_ON(item_size != sizeof(*ei0));
6822                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6823                 refs = btrfs_extent_refs_v0(eb, ei0);
6824 #else
6825                 BUG();
6826 #endif
6827                 memset(&tmpl, 0, sizeof(tmpl));
6828                 tmpl.start = key.objectid;
6829                 tmpl.nr = num_bytes;
6830                 tmpl.extent_item_refs = refs;
6831                 tmpl.metadata = metadata;
6832                 tmpl.found_rec = 1;
6833                 tmpl.max_size = num_bytes;
6834
6835                 return add_extent_rec(extent_cache, &tmpl);
6836         }
6837
6838         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6839         refs = btrfs_extent_refs(eb, ei);
6840         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6841                 metadata = 1;
6842         else
6843                 metadata = 0;
6844         if (metadata && num_bytes != root->fs_info->nodesize) {
6845                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6846                       num_bytes, root->fs_info->nodesize);
6847                 return -EIO;
6848         }
6849         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6850                 error("ignore invalid data extent, length %llu is not aligned to %u",
6851                       num_bytes, root->fs_info->sectorsize);
6852                 return -EIO;
6853         }
6854
6855         memset(&tmpl, 0, sizeof(tmpl));
6856         tmpl.start = key.objectid;
6857         tmpl.nr = num_bytes;
6858         tmpl.extent_item_refs = refs;
6859         tmpl.metadata = metadata;
6860         tmpl.found_rec = 1;
6861         tmpl.max_size = num_bytes;
6862         add_extent_rec(extent_cache, &tmpl);
6863
6864         ptr = (unsigned long)(ei + 1);
6865         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6866             key.type == BTRFS_EXTENT_ITEM_KEY)
6867                 ptr += sizeof(struct btrfs_tree_block_info);
6868
6869         end = (unsigned long)ei + item_size;
6870         while (ptr < end) {
6871                 iref = (struct btrfs_extent_inline_ref *)ptr;
6872                 type = btrfs_extent_inline_ref_type(eb, iref);
6873                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6874                 switch (type) {
6875                 case BTRFS_TREE_BLOCK_REF_KEY:
6876                         ret = add_tree_backref(extent_cache, key.objectid,
6877                                         0, offset, 0);
6878                         if (ret < 0)
6879                                 error(
6880                         "add_tree_backref failed (extent items tree block): %s",
6881                                       strerror(-ret));
6882                         break;
6883                 case BTRFS_SHARED_BLOCK_REF_KEY:
6884                         ret = add_tree_backref(extent_cache, key.objectid,
6885                                         offset, 0, 0);
6886                         if (ret < 0)
6887                                 error(
6888                         "add_tree_backref failed (extent items shared block): %s",
6889                                       strerror(-ret));
6890                         break;
6891                 case BTRFS_EXTENT_DATA_REF_KEY:
6892                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6893                         add_data_backref(extent_cache, key.objectid, 0,
6894                                         btrfs_extent_data_ref_root(eb, dref),
6895                                         btrfs_extent_data_ref_objectid(eb,
6896                                                                        dref),
6897                                         btrfs_extent_data_ref_offset(eb, dref),
6898                                         btrfs_extent_data_ref_count(eb, dref),
6899                                         0, num_bytes);
6900                         break;
6901                 case BTRFS_SHARED_DATA_REF_KEY:
6902                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6903                         add_data_backref(extent_cache, key.objectid, offset,
6904                                         0, 0, 0,
6905                                         btrfs_shared_data_ref_count(eb, sref),
6906                                         0, num_bytes);
6907                         break;
6908                 default:
6909                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6910                                 key.objectid, key.type, num_bytes);
6911                         goto out;
6912                 }
6913                 ptr += btrfs_extent_inline_ref_size(type);
6914         }
6915         WARN_ON(ptr > end);
6916 out:
6917         return 0;
6918 }
6919
6920 static int check_cache_range(struct btrfs_root *root,
6921                              struct btrfs_block_group_cache *cache,
6922                              u64 offset, u64 bytes)
6923 {
6924         struct btrfs_free_space *entry;
6925         u64 *logical;
6926         u64 bytenr;
6927         int stripe_len;
6928         int i, nr, ret;
6929
6930         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6931                 bytenr = btrfs_sb_offset(i);
6932                 ret = btrfs_rmap_block(root->fs_info,
6933                                        cache->key.objectid, bytenr, 0,
6934                                        &logical, &nr, &stripe_len);
6935                 if (ret)
6936                         return ret;
6937
6938                 while (nr--) {
6939                         if (logical[nr] + stripe_len <= offset)
6940                                 continue;
6941                         if (offset + bytes <= logical[nr])
6942                                 continue;
6943                         if (logical[nr] == offset) {
6944                                 if (stripe_len >= bytes) {
6945                                         free(logical);
6946                                         return 0;
6947                                 }
6948                                 bytes -= stripe_len;
6949                                 offset += stripe_len;
6950                         } else if (logical[nr] < offset) {
6951                                 if (logical[nr] + stripe_len >=
6952                                     offset + bytes) {
6953                                         free(logical);
6954                                         return 0;
6955                                 }
6956                                 bytes = (offset + bytes) -
6957                                         (logical[nr] + stripe_len);
6958                                 offset = logical[nr] + stripe_len;
6959                         } else {
6960                                 /*
6961                                  * Could be tricky, the super may land in the
6962                                  * middle of the area we're checking.  First
6963                                  * check the easiest case, it's at the end.
6964                                  */
6965                                 if (logical[nr] + stripe_len >=
6966                                     bytes + offset) {
6967                                         bytes = logical[nr] - offset;
6968                                         continue;
6969                                 }
6970
6971                                 /* Check the left side */
6972                                 ret = check_cache_range(root, cache,
6973                                                         offset,
6974                                                         logical[nr] - offset);
6975                                 if (ret) {
6976                                         free(logical);
6977                                         return ret;
6978                                 }
6979
6980                                 /* Now we continue with the right side */
6981                                 bytes = (offset + bytes) -
6982                                         (logical[nr] + stripe_len);
6983                                 offset = logical[nr] + stripe_len;
6984                         }
6985                 }
6986
6987                 free(logical);
6988         }
6989
6990         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6991         if (!entry) {
6992                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6993                         offset, offset+bytes);
6994                 return -EINVAL;
6995         }
6996
6997         if (entry->offset != offset) {
6998                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6999                         entry->offset);
7000                 return -EINVAL;
7001         }
7002
7003         if (entry->bytes != bytes) {
7004                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7005                         bytes, entry->bytes, offset);
7006                 return -EINVAL;
7007         }
7008
7009         unlink_free_space(cache->free_space_ctl, entry);
7010         free(entry);
7011         return 0;
7012 }
7013
7014 static int verify_space_cache(struct btrfs_root *root,
7015                               struct btrfs_block_group_cache *cache)
7016 {
7017         struct btrfs_path path;
7018         struct extent_buffer *leaf;
7019         struct btrfs_key key;
7020         u64 last;
7021         int ret = 0;
7022
7023         root = root->fs_info->extent_root;
7024
7025         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7026
7027         btrfs_init_path(&path);
7028         key.objectid = last;
7029         key.offset = 0;
7030         key.type = BTRFS_EXTENT_ITEM_KEY;
7031         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7032         if (ret < 0)
7033                 goto out;
7034         ret = 0;
7035         while (1) {
7036                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7037                         ret = btrfs_next_leaf(root, &path);
7038                         if (ret < 0)
7039                                 goto out;
7040                         if (ret > 0) {
7041                                 ret = 0;
7042                                 break;
7043                         }
7044                 }
7045                 leaf = path.nodes[0];
7046                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7047                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7048                         break;
7049                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7050                     key.type != BTRFS_METADATA_ITEM_KEY) {
7051                         path.slots[0]++;
7052                         continue;
7053                 }
7054
7055                 if (last == key.objectid) {
7056                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7057                                 last = key.objectid + key.offset;
7058                         else
7059                                 last = key.objectid + root->fs_info->nodesize;
7060                         path.slots[0]++;
7061                         continue;
7062                 }
7063
7064                 ret = check_cache_range(root, cache, last,
7065                                         key.objectid - last);
7066                 if (ret)
7067                         break;
7068                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7069                         last = key.objectid + key.offset;
7070                 else
7071                         last = key.objectid + root->fs_info->nodesize;
7072                 path.slots[0]++;
7073         }
7074
7075         if (last < cache->key.objectid + cache->key.offset)
7076                 ret = check_cache_range(root, cache, last,
7077                                         cache->key.objectid +
7078                                         cache->key.offset - last);
7079
7080 out:
7081         btrfs_release_path(&path);
7082
7083         if (!ret &&
7084             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7085                 fprintf(stderr, "There are still entries left in the space "
7086                         "cache\n");
7087                 ret = -EINVAL;
7088         }
7089
7090         return ret;
7091 }
7092
7093 static int check_space_cache(struct btrfs_root *root)
7094 {
7095         struct btrfs_block_group_cache *cache;
7096         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7097         int ret;
7098         int error = 0;
7099
7100         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7101             btrfs_super_generation(root->fs_info->super_copy) !=
7102             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7103                 printf("cache and super generation don't match, space cache "
7104                        "will be invalidated\n");
7105                 return 0;
7106         }
7107
7108         if (ctx.progress_enabled) {
7109                 ctx.tp = TASK_FREE_SPACE;
7110                 task_start(ctx.info);
7111         }
7112
7113         while (1) {
7114                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7115                 if (!cache)
7116                         break;
7117
7118                 start = cache->key.objectid + cache->key.offset;
7119                 if (!cache->free_space_ctl) {
7120                         if (btrfs_init_free_space_ctl(cache,
7121                                                 root->fs_info->sectorsize)) {
7122                                 ret = -ENOMEM;
7123                                 break;
7124                         }
7125                 } else {
7126                         btrfs_remove_free_space_cache(cache);
7127                 }
7128
7129                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7130                         ret = exclude_super_stripes(root, cache);
7131                         if (ret) {
7132                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7133                                         strerror(-ret));
7134                                 error++;
7135                                 continue;
7136                         }
7137                         ret = load_free_space_tree(root->fs_info, cache);
7138                         free_excluded_extents(root, cache);
7139                         if (ret < 0) {
7140                                 fprintf(stderr, "could not load free space tree: %s\n",
7141                                         strerror(-ret));
7142                                 error++;
7143                                 continue;
7144                         }
7145                         error += ret;
7146                 } else {
7147                         ret = load_free_space_cache(root->fs_info, cache);
7148                         if (!ret)
7149                                 continue;
7150                 }
7151
7152                 ret = verify_space_cache(root, cache);
7153                 if (ret) {
7154                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7155                                 cache->key.objectid);
7156                         error++;
7157                 }
7158         }
7159
7160         task_stop(ctx.info);
7161
7162         return error ? -EINVAL : 0;
7163 }
7164
7165 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7166                         u64 num_bytes, unsigned long leaf_offset,
7167                         struct extent_buffer *eb) {
7168
7169         u64 offset = 0;
7170         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7171         char *data;
7172         unsigned long csum_offset;
7173         u32 csum;
7174         u32 csum_expected;
7175         u64 read_len;
7176         u64 data_checked = 0;
7177         u64 tmp;
7178         int ret = 0;
7179         int mirror;
7180         int num_copies;
7181
7182         if (num_bytes % root->fs_info->sectorsize)
7183                 return -EINVAL;
7184
7185         data = malloc(num_bytes);
7186         if (!data)
7187                 return -ENOMEM;
7188
7189         while (offset < num_bytes) {
7190                 mirror = 0;
7191 again:
7192                 read_len = num_bytes - offset;
7193                 /* read as much space once a time */
7194                 ret = read_extent_data(root, data + offset,
7195                                 bytenr + offset, &read_len, mirror);
7196                 if (ret)
7197                         goto out;
7198                 data_checked = 0;
7199                 /* verify every 4k data's checksum */
7200                 while (data_checked < read_len) {
7201                         csum = ~(u32)0;
7202                         tmp = offset + data_checked;
7203
7204                         csum = btrfs_csum_data((char *)data + tmp,
7205                                                csum, root->fs_info->sectorsize);
7206                         btrfs_csum_final(csum, (u8 *)&csum);
7207
7208                         csum_offset = leaf_offset +
7209                                  tmp / root->fs_info->sectorsize * csum_size;
7210                         read_extent_buffer(eb, (char *)&csum_expected,
7211                                            csum_offset, csum_size);
7212                         /* try another mirror */
7213                         if (csum != csum_expected) {
7214                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7215                                                 mirror, bytenr + tmp,
7216                                                 csum, csum_expected);
7217                                 num_copies = btrfs_num_copies(root->fs_info,
7218                                                 bytenr, num_bytes);
7219                                 if (mirror < num_copies - 1) {
7220                                         mirror += 1;
7221                                         goto again;
7222                                 }
7223                         }
7224                         data_checked += root->fs_info->sectorsize;
7225                 }
7226                 offset += read_len;
7227         }
7228 out:
7229         free(data);
7230         return ret;
7231 }
7232
7233 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7234                                u64 num_bytes)
7235 {
7236         struct btrfs_path path;
7237         struct extent_buffer *leaf;
7238         struct btrfs_key key;
7239         int ret;
7240
7241         btrfs_init_path(&path);
7242         key.objectid = bytenr;
7243         key.type = BTRFS_EXTENT_ITEM_KEY;
7244         key.offset = (u64)-1;
7245
7246 again:
7247         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7248                                 0, 0);
7249         if (ret < 0) {
7250                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7251                 btrfs_release_path(&path);
7252                 return ret;
7253         } else if (ret) {
7254                 if (path.slots[0] > 0) {
7255                         path.slots[0]--;
7256                 } else {
7257                         ret = btrfs_prev_leaf(root, &path);
7258                         if (ret < 0) {
7259                                 goto out;
7260                         } else if (ret > 0) {
7261                                 ret = 0;
7262                                 goto out;
7263                         }
7264                 }
7265         }
7266
7267         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7268
7269         /*
7270          * Block group items come before extent items if they have the same
7271          * bytenr, so walk back one more just in case.  Dear future traveller,
7272          * first congrats on mastering time travel.  Now if it's not too much
7273          * trouble could you go back to 2006 and tell Chris to make the
7274          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7275          * EXTENT_ITEM_KEY please?
7276          */
7277         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7278                 if (path.slots[0] > 0) {
7279                         path.slots[0]--;
7280                 } else {
7281                         ret = btrfs_prev_leaf(root, &path);
7282                         if (ret < 0) {
7283                                 goto out;
7284                         } else if (ret > 0) {
7285                                 ret = 0;
7286                                 goto out;
7287                         }
7288                 }
7289                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7290         }
7291
7292         while (num_bytes) {
7293                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7294                         ret = btrfs_next_leaf(root, &path);
7295                         if (ret < 0) {
7296                                 fprintf(stderr, "Error going to next leaf "
7297                                         "%d\n", ret);
7298                                 btrfs_release_path(&path);
7299                                 return ret;
7300                         } else if (ret) {
7301                                 break;
7302                         }
7303                 }
7304                 leaf = path.nodes[0];
7305                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7306                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7307                         path.slots[0]++;
7308                         continue;
7309                 }
7310                 if (key.objectid + key.offset < bytenr) {
7311                         path.slots[0]++;
7312                         continue;
7313                 }
7314                 if (key.objectid > bytenr + num_bytes)
7315                         break;
7316
7317                 if (key.objectid == bytenr) {
7318                         if (key.offset >= num_bytes) {
7319                                 num_bytes = 0;
7320                                 break;
7321                         }
7322                         num_bytes -= key.offset;
7323                         bytenr += key.offset;
7324                 } else if (key.objectid < bytenr) {
7325                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7326                                 num_bytes = 0;
7327                                 break;
7328                         }
7329                         num_bytes = (bytenr + num_bytes) -
7330                                 (key.objectid + key.offset);
7331                         bytenr = key.objectid + key.offset;
7332                 } else {
7333                         if (key.objectid + key.offset < bytenr + num_bytes) {
7334                                 u64 new_start = key.objectid + key.offset;
7335                                 u64 new_bytes = bytenr + num_bytes - new_start;
7336
7337                                 /*
7338                                  * Weird case, the extent is in the middle of
7339                                  * our range, we'll have to search one side
7340                                  * and then the other.  Not sure if this happens
7341                                  * in real life, but no harm in coding it up
7342                                  * anyway just in case.
7343                                  */
7344                                 btrfs_release_path(&path);
7345                                 ret = check_extent_exists(root, new_start,
7346                                                           new_bytes);
7347                                 if (ret) {
7348                                         fprintf(stderr, "Right section didn't "
7349                                                 "have a record\n");
7350                                         break;
7351                                 }
7352                                 num_bytes = key.objectid - bytenr;
7353                                 goto again;
7354                         }
7355                         num_bytes = key.objectid - bytenr;
7356                 }
7357                 path.slots[0]++;
7358         }
7359         ret = 0;
7360
7361 out:
7362         if (num_bytes && !ret) {
7363                 fprintf(stderr, "There are no extents for csum range "
7364                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7365                 ret = 1;
7366         }
7367
7368         btrfs_release_path(&path);
7369         return ret;
7370 }
7371
7372 static int check_csums(struct btrfs_root *root)
7373 {
7374         struct btrfs_path path;
7375         struct extent_buffer *leaf;
7376         struct btrfs_key key;
7377         u64 offset = 0, num_bytes = 0;
7378         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7379         int errors = 0;
7380         int ret;
7381         u64 data_len;
7382         unsigned long leaf_offset;
7383
7384         root = root->fs_info->csum_root;
7385         if (!extent_buffer_uptodate(root->node)) {
7386                 fprintf(stderr, "No valid csum tree found\n");
7387                 return -ENOENT;
7388         }
7389
7390         btrfs_init_path(&path);
7391         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7392         key.type = BTRFS_EXTENT_CSUM_KEY;
7393         key.offset = 0;
7394         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7395         if (ret < 0) {
7396                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7397                 btrfs_release_path(&path);
7398                 return ret;
7399         }
7400
7401         if (ret > 0 && path.slots[0])
7402                 path.slots[0]--;
7403         ret = 0;
7404
7405         while (1) {
7406                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7407                         ret = btrfs_next_leaf(root, &path);
7408                         if (ret < 0) {
7409                                 fprintf(stderr, "Error going to next leaf "
7410                                         "%d\n", ret);
7411                                 break;
7412                         }
7413                         if (ret)
7414                                 break;
7415                 }
7416                 leaf = path.nodes[0];
7417
7418                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7419                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7420                         path.slots[0]++;
7421                         continue;
7422                 }
7423
7424                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7425                               csum_size) * root->fs_info->sectorsize;
7426                 if (!check_data_csum)
7427                         goto skip_csum_check;
7428                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7429                 ret = check_extent_csums(root, key.offset, data_len,
7430                                          leaf_offset, leaf);
7431                 if (ret)
7432                         break;
7433 skip_csum_check:
7434                 if (!num_bytes) {
7435                         offset = key.offset;
7436                 } else if (key.offset != offset + num_bytes) {
7437                         ret = check_extent_exists(root, offset, num_bytes);
7438                         if (ret) {
7439                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7440                                         "there is no extent record\n",
7441                                         offset, offset+num_bytes);
7442                                 errors++;
7443                         }
7444                         offset = key.offset;
7445                         num_bytes = 0;
7446                 }
7447                 num_bytes += data_len;
7448                 path.slots[0]++;
7449         }
7450
7451         btrfs_release_path(&path);
7452         return errors;
7453 }
7454
7455 static int is_dropped_key(struct btrfs_key *key,
7456                           struct btrfs_key *drop_key) {
7457         if (key->objectid < drop_key->objectid)
7458                 return 1;
7459         else if (key->objectid == drop_key->objectid) {
7460                 if (key->type < drop_key->type)
7461                         return 1;
7462                 else if (key->type == drop_key->type) {
7463                         if (key->offset < drop_key->offset)
7464                                 return 1;
7465                 }
7466         }
7467         return 0;
7468 }
7469
7470 /*
7471  * Here are the rules for FULL_BACKREF.
7472  *
7473  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7474  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7475  *      FULL_BACKREF set.
7476  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7477  *    if it happened after the relocation occurred since we'll have dropped the
7478  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7479  *    have no real way to know for sure.
7480  *
7481  * We process the blocks one root at a time, and we start from the lowest root
7482  * objectid and go to the highest.  So we can just lookup the owner backref for
7483  * the record and if we don't find it then we know it doesn't exist and we have
7484  * a FULL BACKREF.
7485  *
7486  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7487  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7488  * be set or not and then we can check later once we've gathered all the refs.
7489  */
7490 static int calc_extent_flag(struct cache_tree *extent_cache,
7491                            struct extent_buffer *buf,
7492                            struct root_item_record *ri,
7493                            u64 *flags)
7494 {
7495         struct extent_record *rec;
7496         struct cache_extent *cache;
7497         struct tree_backref *tback;
7498         u64 owner = 0;
7499
7500         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7501         /* we have added this extent before */
7502         if (!cache)
7503                 return -ENOENT;
7504
7505         rec = container_of(cache, struct extent_record, cache);
7506
7507         /*
7508          * Except file/reloc tree, we can not have
7509          * FULL BACKREF MODE
7510          */
7511         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7512                 goto normal;
7513         /*
7514          * root node
7515          */
7516         if (buf->start == ri->bytenr)
7517                 goto normal;
7518
7519         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7520                 goto full_backref;
7521
7522         owner = btrfs_header_owner(buf);
7523         if (owner == ri->objectid)
7524                 goto normal;
7525
7526         tback = find_tree_backref(rec, 0, owner);
7527         if (!tback)
7528                 goto full_backref;
7529 normal:
7530         *flags = 0;
7531         if (rec->flag_block_full_backref != FLAG_UNSET &&
7532             rec->flag_block_full_backref != 0)
7533                 rec->bad_full_backref = 1;
7534         return 0;
7535 full_backref:
7536         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7537         if (rec->flag_block_full_backref != FLAG_UNSET &&
7538             rec->flag_block_full_backref != 1)
7539                 rec->bad_full_backref = 1;
7540         return 0;
7541 }
7542
7543 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7544 {
7545         fprintf(stderr, "Invalid key type(");
7546         print_key_type(stderr, 0, key_type);
7547         fprintf(stderr, ") found in root(");
7548         print_objectid(stderr, rootid, 0);
7549         fprintf(stderr, ")\n");
7550 }
7551
7552 /*
7553  * Check if the key is valid with its extent buffer.
7554  *
7555  * This is a early check in case invalid key exists in a extent buffer
7556  * This is not comprehensive yet, but should prevent wrong key/item passed
7557  * further
7558  */
7559 static int check_type_with_root(u64 rootid, u8 key_type)
7560 {
7561         switch (key_type) {
7562         /* Only valid in chunk tree */
7563         case BTRFS_DEV_ITEM_KEY:
7564         case BTRFS_CHUNK_ITEM_KEY:
7565                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7566                         goto err;
7567                 break;
7568         /* valid in csum and log tree */
7569         case BTRFS_CSUM_TREE_OBJECTID:
7570                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7571                       is_fstree(rootid)))
7572                         goto err;
7573                 break;
7574         case BTRFS_EXTENT_ITEM_KEY:
7575         case BTRFS_METADATA_ITEM_KEY:
7576         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7577                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7578                         goto err;
7579                 break;
7580         case BTRFS_ROOT_ITEM_KEY:
7581                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7582                         goto err;
7583                 break;
7584         case BTRFS_DEV_EXTENT_KEY:
7585                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7586                         goto err;
7587                 break;
7588         }
7589         return 0;
7590 err:
7591         report_mismatch_key_root(key_type, rootid);
7592         return -EINVAL;
7593 }
7594
7595 static int run_next_block(struct btrfs_root *root,
7596                           struct block_info *bits,
7597                           int bits_nr,
7598                           u64 *last,
7599                           struct cache_tree *pending,
7600                           struct cache_tree *seen,
7601                           struct cache_tree *reada,
7602                           struct cache_tree *nodes,
7603                           struct cache_tree *extent_cache,
7604                           struct cache_tree *chunk_cache,
7605                           struct rb_root *dev_cache,
7606                           struct block_group_tree *block_group_cache,
7607                           struct device_extent_tree *dev_extent_cache,
7608                           struct root_item_record *ri)
7609 {
7610         struct extent_buffer *buf;
7611         struct extent_record *rec = NULL;
7612         u64 bytenr;
7613         u32 size;
7614         u64 parent;
7615         u64 owner;
7616         u64 flags;
7617         u64 ptr;
7618         u64 gen = 0;
7619         int ret = 0;
7620         int i;
7621         int nritems;
7622         struct btrfs_key key;
7623         struct cache_extent *cache;
7624         int reada_bits;
7625
7626         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7627                                     bits_nr, &reada_bits);
7628         if (nritems == 0)
7629                 return 1;
7630
7631         if (!reada_bits) {
7632                 for(i = 0; i < nritems; i++) {
7633                         ret = add_cache_extent(reada, bits[i].start,
7634                                                bits[i].size);
7635                         if (ret == -EEXIST)
7636                                 continue;
7637
7638                         /* fixme, get the parent transid */
7639                         readahead_tree_block(root, bits[i].start,
7640                                              bits[i].size, 0);
7641                 }
7642         }
7643         *last = bits[0].start;
7644         bytenr = bits[0].start;
7645         size = bits[0].size;
7646
7647         cache = lookup_cache_extent(pending, bytenr, size);
7648         if (cache) {
7649                 remove_cache_extent(pending, cache);
7650                 free(cache);
7651         }
7652         cache = lookup_cache_extent(reada, bytenr, size);
7653         if (cache) {
7654                 remove_cache_extent(reada, cache);
7655                 free(cache);
7656         }
7657         cache = lookup_cache_extent(nodes, bytenr, size);
7658         if (cache) {
7659                 remove_cache_extent(nodes, cache);
7660                 free(cache);
7661         }
7662         cache = lookup_cache_extent(extent_cache, bytenr, size);
7663         if (cache) {
7664                 rec = container_of(cache, struct extent_record, cache);
7665                 gen = rec->parent_generation;
7666         }
7667
7668         /* fixme, get the real parent transid */
7669         buf = read_tree_block(root->fs_info, bytenr, size, gen);
7670         if (!extent_buffer_uptodate(buf)) {
7671                 record_bad_block_io(root->fs_info,
7672                                     extent_cache, bytenr, size);
7673                 goto out;
7674         }
7675
7676         nritems = btrfs_header_nritems(buf);
7677
7678         flags = 0;
7679         if (!init_extent_tree) {
7680                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7681                                        btrfs_header_level(buf), 1, NULL,
7682                                        &flags);
7683                 if (ret < 0) {
7684                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7685                         if (ret < 0) {
7686                                 fprintf(stderr, "Couldn't calc extent flags\n");
7687                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7688                         }
7689                 }
7690         } else {
7691                 flags = 0;
7692                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7693                 if (ret < 0) {
7694                         fprintf(stderr, "Couldn't calc extent flags\n");
7695                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7696                 }
7697         }
7698
7699         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7700                 if (ri != NULL &&
7701                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7702                     ri->objectid == btrfs_header_owner(buf)) {
7703                         /*
7704                          * Ok we got to this block from it's original owner and
7705                          * we have FULL_BACKREF set.  Relocation can leave
7706                          * converted blocks over so this is altogether possible,
7707                          * however it's not possible if the generation > the
7708                          * last snapshot, so check for this case.
7709                          */
7710                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7711                             btrfs_header_generation(buf) > ri->last_snapshot) {
7712                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7713                                 rec->bad_full_backref = 1;
7714                         }
7715                 }
7716         } else {
7717                 if (ri != NULL &&
7718                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7719                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7720                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7721                         rec->bad_full_backref = 1;
7722                 }
7723         }
7724
7725         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7726                 rec->flag_block_full_backref = 1;
7727                 parent = bytenr;
7728                 owner = 0;
7729         } else {
7730                 rec->flag_block_full_backref = 0;
7731                 parent = 0;
7732                 owner = btrfs_header_owner(buf);
7733         }
7734
7735         ret = check_block(root, extent_cache, buf, flags);
7736         if (ret)
7737                 goto out;
7738
7739         if (btrfs_is_leaf(buf)) {
7740                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7741                 for (i = 0; i < nritems; i++) {
7742                         struct btrfs_file_extent_item *fi;
7743                         btrfs_item_key_to_cpu(buf, &key, i);
7744                         /*
7745                          * Check key type against the leaf owner.
7746                          * Could filter quite a lot of early error if
7747                          * owner is correct
7748                          */
7749                         if (check_type_with_root(btrfs_header_owner(buf),
7750                                                  key.type)) {
7751                                 fprintf(stderr, "ignoring invalid key\n");
7752                                 continue;
7753                         }
7754                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7755                                 process_extent_item(root, extent_cache, buf,
7756                                                     i);
7757                                 continue;
7758                         }
7759                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7760                                 process_extent_item(root, extent_cache, buf,
7761                                                     i);
7762                                 continue;
7763                         }
7764                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7765                                 total_csum_bytes +=
7766                                         btrfs_item_size_nr(buf, i);
7767                                 continue;
7768                         }
7769                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7770                                 process_chunk_item(chunk_cache, &key, buf, i);
7771                                 continue;
7772                         }
7773                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7774                                 process_device_item(dev_cache, &key, buf, i);
7775                                 continue;
7776                         }
7777                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7778                                 process_block_group_item(block_group_cache,
7779                                         &key, buf, i);
7780                                 continue;
7781                         }
7782                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7783                                 process_device_extent_item(dev_extent_cache,
7784                                         &key, buf, i);
7785                                 continue;
7786
7787                         }
7788                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7789 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7790                                 process_extent_ref_v0(extent_cache, buf, i);
7791 #else
7792                                 BUG();
7793 #endif
7794                                 continue;
7795                         }
7796
7797                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7798                                 ret = add_tree_backref(extent_cache,
7799                                                 key.objectid, 0, key.offset, 0);
7800                                 if (ret < 0)
7801                                         error(
7802                                 "add_tree_backref failed (leaf tree block): %s",
7803                                               strerror(-ret));
7804                                 continue;
7805                         }
7806                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7807                                 ret = add_tree_backref(extent_cache,
7808                                                 key.objectid, key.offset, 0, 0);
7809                                 if (ret < 0)
7810                                         error(
7811                                 "add_tree_backref failed (leaf shared block): %s",
7812                                               strerror(-ret));
7813                                 continue;
7814                         }
7815                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7816                                 struct btrfs_extent_data_ref *ref;
7817                                 ref = btrfs_item_ptr(buf, i,
7818                                                 struct btrfs_extent_data_ref);
7819                                 add_data_backref(extent_cache,
7820                                         key.objectid, 0,
7821                                         btrfs_extent_data_ref_root(buf, ref),
7822                                         btrfs_extent_data_ref_objectid(buf,
7823                                                                        ref),
7824                                         btrfs_extent_data_ref_offset(buf, ref),
7825                                         btrfs_extent_data_ref_count(buf, ref),
7826                                         0, root->fs_info->sectorsize);
7827                                 continue;
7828                         }
7829                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7830                                 struct btrfs_shared_data_ref *ref;
7831                                 ref = btrfs_item_ptr(buf, i,
7832                                                 struct btrfs_shared_data_ref);
7833                                 add_data_backref(extent_cache,
7834                                         key.objectid, key.offset, 0, 0, 0,
7835                                         btrfs_shared_data_ref_count(buf, ref),
7836                                         0, root->fs_info->sectorsize);
7837                                 continue;
7838                         }
7839                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7840                                 struct bad_item *bad;
7841
7842                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7843                                         continue;
7844                                 if (!owner)
7845                                         continue;
7846                                 bad = malloc(sizeof(struct bad_item));
7847                                 if (!bad)
7848                                         continue;
7849                                 INIT_LIST_HEAD(&bad->list);
7850                                 memcpy(&bad->key, &key,
7851                                        sizeof(struct btrfs_key));
7852                                 bad->root_id = owner;
7853                                 list_add_tail(&bad->list, &delete_items);
7854                                 continue;
7855                         }
7856                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7857                                 continue;
7858                         fi = btrfs_item_ptr(buf, i,
7859                                             struct btrfs_file_extent_item);
7860                         if (btrfs_file_extent_type(buf, fi) ==
7861                             BTRFS_FILE_EXTENT_INLINE)
7862                                 continue;
7863                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7864                                 continue;
7865
7866                         data_bytes_allocated +=
7867                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7868                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7869                                 abort();
7870                         }
7871                         data_bytes_referenced +=
7872                                 btrfs_file_extent_num_bytes(buf, fi);
7873                         add_data_backref(extent_cache,
7874                                 btrfs_file_extent_disk_bytenr(buf, fi),
7875                                 parent, owner, key.objectid, key.offset -
7876                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7877                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7878                 }
7879         } else {
7880                 int level;
7881                 struct btrfs_key first_key;
7882
7883                 first_key.objectid = 0;
7884
7885                 if (nritems > 0)
7886                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7887                 level = btrfs_header_level(buf);
7888                 for (i = 0; i < nritems; i++) {
7889                         struct extent_record tmpl;
7890
7891                         ptr = btrfs_node_blockptr(buf, i);
7892                         size = root->fs_info->nodesize;
7893                         btrfs_node_key_to_cpu(buf, &key, i);
7894                         if (ri != NULL) {
7895                                 if ((level == ri->drop_level)
7896                                     && is_dropped_key(&key, &ri->drop_key)) {
7897                                         continue;
7898                                 }
7899                         }
7900
7901                         memset(&tmpl, 0, sizeof(tmpl));
7902                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7903                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7904                         tmpl.start = ptr;
7905                         tmpl.nr = size;
7906                         tmpl.refs = 1;
7907                         tmpl.metadata = 1;
7908                         tmpl.max_size = size;
7909                         ret = add_extent_rec(extent_cache, &tmpl);
7910                         if (ret < 0)
7911                                 goto out;
7912
7913                         ret = add_tree_backref(extent_cache, ptr, parent,
7914                                         owner, 1);
7915                         if (ret < 0) {
7916                                 error(
7917                                 "add_tree_backref failed (non-leaf block): %s",
7918                                       strerror(-ret));
7919                                 continue;
7920                         }
7921
7922                         if (level > 1) {
7923                                 add_pending(nodes, seen, ptr, size);
7924                         } else {
7925                                 add_pending(pending, seen, ptr, size);
7926                         }
7927                 }
7928                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7929                                       nritems) * sizeof(struct btrfs_key_ptr);
7930         }
7931         total_btree_bytes += buf->len;
7932         if (fs_root_objectid(btrfs_header_owner(buf)))
7933                 total_fs_tree_bytes += buf->len;
7934         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7935                 total_extent_tree_bytes += buf->len;
7936         if (!found_old_backref &&
7937             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7938             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7939             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7940                 found_old_backref = 1;
7941 out:
7942         free_extent_buffer(buf);
7943         return ret;
7944 }
7945
7946 static int add_root_to_pending(struct extent_buffer *buf,
7947                                struct cache_tree *extent_cache,
7948                                struct cache_tree *pending,
7949                                struct cache_tree *seen,
7950                                struct cache_tree *nodes,
7951                                u64 objectid)
7952 {
7953         struct extent_record tmpl;
7954         int ret;
7955
7956         if (btrfs_header_level(buf) > 0)
7957                 add_pending(nodes, seen, buf->start, buf->len);
7958         else
7959                 add_pending(pending, seen, buf->start, buf->len);
7960
7961         memset(&tmpl, 0, sizeof(tmpl));
7962         tmpl.start = buf->start;
7963         tmpl.nr = buf->len;
7964         tmpl.is_root = 1;
7965         tmpl.refs = 1;
7966         tmpl.metadata = 1;
7967         tmpl.max_size = buf->len;
7968         add_extent_rec(extent_cache, &tmpl);
7969
7970         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7971             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7972                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7973                                 0, 1);
7974         else
7975                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7976                                 1);
7977         return ret;
7978 }
7979
7980 /* as we fix the tree, we might be deleting blocks that
7981  * we're tracking for repair.  This hook makes sure we
7982  * remove any backrefs for blocks as we are fixing them.
7983  */
7984 static int free_extent_hook(struct btrfs_trans_handle *trans,
7985                             struct btrfs_root *root,
7986                             u64 bytenr, u64 num_bytes, u64 parent,
7987                             u64 root_objectid, u64 owner, u64 offset,
7988                             int refs_to_drop)
7989 {
7990         struct extent_record *rec;
7991         struct cache_extent *cache;
7992         int is_data;
7993         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7994
7995         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7996         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7997         if (!cache)
7998                 return 0;
7999
8000         rec = container_of(cache, struct extent_record, cache);
8001         if (is_data) {
8002                 struct data_backref *back;
8003                 back = find_data_backref(rec, parent, root_objectid, owner,
8004                                          offset, 1, bytenr, num_bytes);
8005                 if (!back)
8006                         goto out;
8007                 if (back->node.found_ref) {
8008                         back->found_ref -= refs_to_drop;
8009                         if (rec->refs)
8010                                 rec->refs -= refs_to_drop;
8011                 }
8012                 if (back->node.found_extent_tree) {
8013                         back->num_refs -= refs_to_drop;
8014                         if (rec->extent_item_refs)
8015                                 rec->extent_item_refs -= refs_to_drop;
8016                 }
8017                 if (back->found_ref == 0)
8018                         back->node.found_ref = 0;
8019                 if (back->num_refs == 0)
8020                         back->node.found_extent_tree = 0;
8021
8022                 if (!back->node.found_extent_tree && back->node.found_ref) {
8023                         list_del(&back->node.list);
8024                         free(back);
8025                 }
8026         } else {
8027                 struct tree_backref *back;
8028                 back = find_tree_backref(rec, parent, root_objectid);
8029                 if (!back)
8030                         goto out;
8031                 if (back->node.found_ref) {
8032                         if (rec->refs)
8033                                 rec->refs--;
8034                         back->node.found_ref = 0;
8035                 }
8036                 if (back->node.found_extent_tree) {
8037                         if (rec->extent_item_refs)
8038                                 rec->extent_item_refs--;
8039                         back->node.found_extent_tree = 0;
8040                 }
8041                 if (!back->node.found_extent_tree && back->node.found_ref) {
8042                         list_del(&back->node.list);
8043                         free(back);
8044                 }
8045         }
8046         maybe_free_extent_rec(extent_cache, rec);
8047 out:
8048         return 0;
8049 }
8050
8051 static int delete_extent_records(struct btrfs_trans_handle *trans,
8052                                  struct btrfs_root *root,
8053                                  struct btrfs_path *path,
8054                                  u64 bytenr)
8055 {
8056         struct btrfs_key key;
8057         struct btrfs_key found_key;
8058         struct extent_buffer *leaf;
8059         int ret;
8060         int slot;
8061
8062
8063         key.objectid = bytenr;
8064         key.type = (u8)-1;
8065         key.offset = (u64)-1;
8066
8067         while(1) {
8068                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8069                                         &key, path, 0, 1);
8070                 if (ret < 0)
8071                         break;
8072
8073                 if (ret > 0) {
8074                         ret = 0;
8075                         if (path->slots[0] == 0)
8076                                 break;
8077                         path->slots[0]--;
8078                 }
8079                 ret = 0;
8080
8081                 leaf = path->nodes[0];
8082                 slot = path->slots[0];
8083
8084                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8085                 if (found_key.objectid != bytenr)
8086                         break;
8087
8088                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8089                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8090                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8091                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8092                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8093                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8094                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8095                         btrfs_release_path(path);
8096                         if (found_key.type == 0) {
8097                                 if (found_key.offset == 0)
8098                                         break;
8099                                 key.offset = found_key.offset - 1;
8100                                 key.type = found_key.type;
8101                         }
8102                         key.type = found_key.type - 1;
8103                         key.offset = (u64)-1;
8104                         continue;
8105                 }
8106
8107                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8108                         found_key.objectid, found_key.type, found_key.offset);
8109
8110                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8111                 if (ret)
8112                         break;
8113                 btrfs_release_path(path);
8114
8115                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8116                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8117                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8118                                 found_key.offset : root->fs_info->nodesize;
8119
8120                         ret = btrfs_update_block_group(trans, root, bytenr,
8121                                                        bytes, 0, 0);
8122                         if (ret)
8123                                 break;
8124                 }
8125         }
8126
8127         btrfs_release_path(path);
8128         return ret;
8129 }
8130
8131 /*
8132  * for a single backref, this will allocate a new extent
8133  * and add the backref to it.
8134  */
8135 static int record_extent(struct btrfs_trans_handle *trans,
8136                          struct btrfs_fs_info *info,
8137                          struct btrfs_path *path,
8138                          struct extent_record *rec,
8139                          struct extent_backref *back,
8140                          int allocated, u64 flags)
8141 {
8142         int ret = 0;
8143         struct btrfs_root *extent_root = info->extent_root;
8144         struct extent_buffer *leaf;
8145         struct btrfs_key ins_key;
8146         struct btrfs_extent_item *ei;
8147         struct data_backref *dback;
8148         struct btrfs_tree_block_info *bi;
8149
8150         if (!back->is_data)
8151                 rec->max_size = max_t(u64, rec->max_size,
8152                                     info->nodesize);
8153
8154         if (!allocated) {
8155                 u32 item_size = sizeof(*ei);
8156
8157                 if (!back->is_data)
8158                         item_size += sizeof(*bi);
8159
8160                 ins_key.objectid = rec->start;
8161                 ins_key.offset = rec->max_size;
8162                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8163
8164                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8165                                         &ins_key, item_size);
8166                 if (ret)
8167                         goto fail;
8168
8169                 leaf = path->nodes[0];
8170                 ei = btrfs_item_ptr(leaf, path->slots[0],
8171                                     struct btrfs_extent_item);
8172
8173                 btrfs_set_extent_refs(leaf, ei, 0);
8174                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8175
8176                 if (back->is_data) {
8177                         btrfs_set_extent_flags(leaf, ei,
8178                                                BTRFS_EXTENT_FLAG_DATA);
8179                 } else {
8180                         struct btrfs_disk_key copy_key;;
8181
8182                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8183                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8184                                              sizeof(*bi));
8185
8186                         btrfs_set_disk_key_objectid(&copy_key,
8187                                                     rec->info_objectid);
8188                         btrfs_set_disk_key_type(&copy_key, 0);
8189                         btrfs_set_disk_key_offset(&copy_key, 0);
8190
8191                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8192                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8193
8194                         btrfs_set_extent_flags(leaf, ei,
8195                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8196                 }
8197
8198                 btrfs_mark_buffer_dirty(leaf);
8199                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8200                                                rec->max_size, 1, 0);
8201                 if (ret)
8202                         goto fail;
8203                 btrfs_release_path(path);
8204         }
8205
8206         if (back->is_data) {
8207                 u64 parent;
8208                 int i;
8209
8210                 dback = to_data_backref(back);
8211                 if (back->full_backref)
8212                         parent = dback->parent;
8213                 else
8214                         parent = 0;
8215
8216                 for (i = 0; i < dback->found_ref; i++) {
8217                         /* if parent != 0, we're doing a full backref
8218                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8219                          * just makes the backref allocator create a data
8220                          * backref
8221                          */
8222                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8223                                                    rec->start, rec->max_size,
8224                                                    parent,
8225                                                    dback->root,
8226                                                    parent ?
8227                                                    BTRFS_FIRST_FREE_OBJECTID :
8228                                                    dback->owner,
8229                                                    dback->offset);
8230                         if (ret)
8231                                 break;
8232                 }
8233                 fprintf(stderr, "adding new data backref"
8234                                 " on %llu %s %llu owner %llu"
8235                                 " offset %llu found %d\n",
8236                                 (unsigned long long)rec->start,
8237                                 back->full_backref ?
8238                                 "parent" : "root",
8239                                 back->full_backref ?
8240                                 (unsigned long long)parent :
8241                                 (unsigned long long)dback->root,
8242                                 (unsigned long long)dback->owner,
8243                                 (unsigned long long)dback->offset,
8244                                 dback->found_ref);
8245         } else {
8246                 u64 parent;
8247                 struct tree_backref *tback;
8248
8249                 tback = to_tree_backref(back);
8250                 if (back->full_backref)
8251                         parent = tback->parent;
8252                 else
8253                         parent = 0;
8254
8255                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8256                                            rec->start, rec->max_size,
8257                                            parent, tback->root, 0, 0);
8258                 fprintf(stderr, "adding new tree backref on "
8259                         "start %llu len %llu parent %llu root %llu\n",
8260                         rec->start, rec->max_size, parent, tback->root);
8261         }
8262 fail:
8263         btrfs_release_path(path);
8264         return ret;
8265 }
8266
8267 static struct extent_entry *find_entry(struct list_head *entries,
8268                                        u64 bytenr, u64 bytes)
8269 {
8270         struct extent_entry *entry = NULL;
8271
8272         list_for_each_entry(entry, entries, list) {
8273                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8274                         return entry;
8275         }
8276
8277         return NULL;
8278 }
8279
8280 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8281 {
8282         struct extent_entry *entry, *best = NULL, *prev = NULL;
8283
8284         list_for_each_entry(entry, entries, list) {
8285                 /*
8286                  * If there are as many broken entries as entries then we know
8287                  * not to trust this particular entry.
8288                  */
8289                 if (entry->broken == entry->count)
8290                         continue;
8291
8292                 /*
8293                  * Special case, when there are only two entries and 'best' is
8294                  * the first one
8295                  */
8296                 if (!prev) {
8297                         best = entry;
8298                         prev = entry;
8299                         continue;
8300                 }
8301
8302                 /*
8303                  * If our current entry == best then we can't be sure our best
8304                  * is really the best, so we need to keep searching.
8305                  */
8306                 if (best && best->count == entry->count) {
8307                         prev = entry;
8308                         best = NULL;
8309                         continue;
8310                 }
8311
8312                 /* Prev == entry, not good enough, have to keep searching */
8313                 if (!prev->broken && prev->count == entry->count)
8314                         continue;
8315
8316                 if (!best)
8317                         best = (prev->count > entry->count) ? prev : entry;
8318                 else if (best->count < entry->count)
8319                         best = entry;
8320                 prev = entry;
8321         }
8322
8323         return best;
8324 }
8325
8326 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8327                       struct data_backref *dback, struct extent_entry *entry)
8328 {
8329         struct btrfs_trans_handle *trans;
8330         struct btrfs_root *root;
8331         struct btrfs_file_extent_item *fi;
8332         struct extent_buffer *leaf;
8333         struct btrfs_key key;
8334         u64 bytenr, bytes;
8335         int ret, err;
8336
8337         key.objectid = dback->root;
8338         key.type = BTRFS_ROOT_ITEM_KEY;
8339         key.offset = (u64)-1;
8340         root = btrfs_read_fs_root(info, &key);
8341         if (IS_ERR(root)) {
8342                 fprintf(stderr, "Couldn't find root for our ref\n");
8343                 return -EINVAL;
8344         }
8345
8346         /*
8347          * The backref points to the original offset of the extent if it was
8348          * split, so we need to search down to the offset we have and then walk
8349          * forward until we find the backref we're looking for.
8350          */
8351         key.objectid = dback->owner;
8352         key.type = BTRFS_EXTENT_DATA_KEY;
8353         key.offset = dback->offset;
8354         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8355         if (ret < 0) {
8356                 fprintf(stderr, "Error looking up ref %d\n", ret);
8357                 return ret;
8358         }
8359
8360         while (1) {
8361                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8362                         ret = btrfs_next_leaf(root, path);
8363                         if (ret) {
8364                                 fprintf(stderr, "Couldn't find our ref, next\n");
8365                                 return -EINVAL;
8366                         }
8367                 }
8368                 leaf = path->nodes[0];
8369                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8370                 if (key.objectid != dback->owner ||
8371                     key.type != BTRFS_EXTENT_DATA_KEY) {
8372                         fprintf(stderr, "Couldn't find our ref, search\n");
8373                         return -EINVAL;
8374                 }
8375                 fi = btrfs_item_ptr(leaf, path->slots[0],
8376                                     struct btrfs_file_extent_item);
8377                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8378                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8379
8380                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8381                         break;
8382                 path->slots[0]++;
8383         }
8384
8385         btrfs_release_path(path);
8386
8387         trans = btrfs_start_transaction(root, 1);
8388         if (IS_ERR(trans))
8389                 return PTR_ERR(trans);
8390
8391         /*
8392          * Ok we have the key of the file extent we want to fix, now we can cow
8393          * down to the thing and fix it.
8394          */
8395         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8396         if (ret < 0) {
8397                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8398                         key.objectid, key.type, key.offset, ret);
8399                 goto out;
8400         }
8401         if (ret > 0) {
8402                 fprintf(stderr, "Well that's odd, we just found this key "
8403                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8404                         key.offset);
8405                 ret = -EINVAL;
8406                 goto out;
8407         }
8408         leaf = path->nodes[0];
8409         fi = btrfs_item_ptr(leaf, path->slots[0],
8410                             struct btrfs_file_extent_item);
8411
8412         if (btrfs_file_extent_compression(leaf, fi) &&
8413             dback->disk_bytenr != entry->bytenr) {
8414                 fprintf(stderr, "Ref doesn't match the record start and is "
8415                         "compressed, please take a btrfs-image of this file "
8416                         "system and send it to a btrfs developer so they can "
8417                         "complete this functionality for bytenr %Lu\n",
8418                         dback->disk_bytenr);
8419                 ret = -EINVAL;
8420                 goto out;
8421         }
8422
8423         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8424                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8425         } else if (dback->disk_bytenr > entry->bytenr) {
8426                 u64 off_diff, offset;
8427
8428                 off_diff = dback->disk_bytenr - entry->bytenr;
8429                 offset = btrfs_file_extent_offset(leaf, fi);
8430                 if (dback->disk_bytenr + offset +
8431                     btrfs_file_extent_num_bytes(leaf, fi) >
8432                     entry->bytenr + entry->bytes) {
8433                         fprintf(stderr, "Ref is past the entry end, please "
8434                                 "take a btrfs-image of this file system and "
8435                                 "send it to a btrfs developer, ref %Lu\n",
8436                                 dback->disk_bytenr);
8437                         ret = -EINVAL;
8438                         goto out;
8439                 }
8440                 offset += off_diff;
8441                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8442                 btrfs_set_file_extent_offset(leaf, fi, offset);
8443         } else if (dback->disk_bytenr < entry->bytenr) {
8444                 u64 offset;
8445
8446                 offset = btrfs_file_extent_offset(leaf, fi);
8447                 if (dback->disk_bytenr + offset < entry->bytenr) {
8448                         fprintf(stderr, "Ref is before the entry start, please"
8449                                 " take a btrfs-image of this file system and "
8450                                 "send it to a btrfs developer, ref %Lu\n",
8451                                 dback->disk_bytenr);
8452                         ret = -EINVAL;
8453                         goto out;
8454                 }
8455
8456                 offset += dback->disk_bytenr;
8457                 offset -= entry->bytenr;
8458                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8459                 btrfs_set_file_extent_offset(leaf, fi, offset);
8460         }
8461
8462         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8463
8464         /*
8465          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8466          * only do this if we aren't using compression, otherwise it's a
8467          * trickier case.
8468          */
8469         if (!btrfs_file_extent_compression(leaf, fi))
8470                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8471         else
8472                 printf("ram bytes may be wrong?\n");
8473         btrfs_mark_buffer_dirty(leaf);
8474 out:
8475         err = btrfs_commit_transaction(trans, root);
8476         btrfs_release_path(path);
8477         return ret ? ret : err;
8478 }
8479
8480 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8481                            struct extent_record *rec)
8482 {
8483         struct extent_backref *back;
8484         struct data_backref *dback;
8485         struct extent_entry *entry, *best = NULL;
8486         LIST_HEAD(entries);
8487         int nr_entries = 0;
8488         int broken_entries = 0;
8489         int ret = 0;
8490         short mismatch = 0;
8491
8492         /*
8493          * Metadata is easy and the backrefs should always agree on bytenr and
8494          * size, if not we've got bigger issues.
8495          */
8496         if (rec->metadata)
8497                 return 0;
8498
8499         list_for_each_entry(back, &rec->backrefs, list) {
8500                 if (back->full_backref || !back->is_data)
8501                         continue;
8502
8503                 dback = to_data_backref(back);
8504
8505                 /*
8506                  * We only pay attention to backrefs that we found a real
8507                  * backref for.
8508                  */
8509                 if (dback->found_ref == 0)
8510                         continue;
8511
8512                 /*
8513                  * For now we only catch when the bytes don't match, not the
8514                  * bytenr.  We can easily do this at the same time, but I want
8515                  * to have a fs image to test on before we just add repair
8516                  * functionality willy-nilly so we know we won't screw up the
8517                  * repair.
8518                  */
8519
8520                 entry = find_entry(&entries, dback->disk_bytenr,
8521                                    dback->bytes);
8522                 if (!entry) {
8523                         entry = malloc(sizeof(struct extent_entry));
8524                         if (!entry) {
8525                                 ret = -ENOMEM;
8526                                 goto out;
8527                         }
8528                         memset(entry, 0, sizeof(*entry));
8529                         entry->bytenr = dback->disk_bytenr;
8530                         entry->bytes = dback->bytes;
8531                         list_add_tail(&entry->list, &entries);
8532                         nr_entries++;
8533                 }
8534
8535                 /*
8536                  * If we only have on entry we may think the entries agree when
8537                  * in reality they don't so we have to do some extra checking.
8538                  */
8539                 if (dback->disk_bytenr != rec->start ||
8540                     dback->bytes != rec->nr || back->broken)
8541                         mismatch = 1;
8542
8543                 if (back->broken) {
8544                         entry->broken++;
8545                         broken_entries++;
8546                 }
8547
8548                 entry->count++;
8549         }
8550
8551         /* Yay all the backrefs agree, carry on good sir */
8552         if (nr_entries <= 1 && !mismatch)
8553                 goto out;
8554
8555         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8556                 "%Lu\n", rec->start);
8557
8558         /*
8559          * First we want to see if the backrefs can agree amongst themselves who
8560          * is right, so figure out which one of the entries has the highest
8561          * count.
8562          */
8563         best = find_most_right_entry(&entries);
8564
8565         /*
8566          * Ok so we may have an even split between what the backrefs think, so
8567          * this is where we use the extent ref to see what it thinks.
8568          */
8569         if (!best) {
8570                 entry = find_entry(&entries, rec->start, rec->nr);
8571                 if (!entry && (!broken_entries || !rec->found_rec)) {
8572                         fprintf(stderr, "Backrefs don't agree with each other "
8573                                 "and extent record doesn't agree with anybody,"
8574                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8575                                 rec->start, rec->nr);
8576                         ret = -EINVAL;
8577                         goto out;
8578                 } else if (!entry) {
8579                         /*
8580                          * Ok our backrefs were broken, we'll assume this is the
8581                          * correct value and add an entry for this range.
8582                          */
8583                         entry = malloc(sizeof(struct extent_entry));
8584                         if (!entry) {
8585                                 ret = -ENOMEM;
8586                                 goto out;
8587                         }
8588                         memset(entry, 0, sizeof(*entry));
8589                         entry->bytenr = rec->start;
8590                         entry->bytes = rec->nr;
8591                         list_add_tail(&entry->list, &entries);
8592                         nr_entries++;
8593                 }
8594                 entry->count++;
8595                 best = find_most_right_entry(&entries);
8596                 if (!best) {
8597                         fprintf(stderr, "Backrefs and extent record evenly "
8598                                 "split on who is right, this is going to "
8599                                 "require user input to fix bytenr %Lu bytes "
8600                                 "%Lu\n", rec->start, rec->nr);
8601                         ret = -EINVAL;
8602                         goto out;
8603                 }
8604         }
8605
8606         /*
8607          * I don't think this can happen currently as we'll abort() if we catch
8608          * this case higher up, but in case somebody removes that we still can't
8609          * deal with it properly here yet, so just bail out of that's the case.
8610          */
8611         if (best->bytenr != rec->start) {
8612                 fprintf(stderr, "Extent start and backref starts don't match, "
8613                         "please use btrfs-image on this file system and send "
8614                         "it to a btrfs developer so they can make fsck fix "
8615                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8616                         rec->start, rec->nr);
8617                 ret = -EINVAL;
8618                 goto out;
8619         }
8620
8621         /*
8622          * Ok great we all agreed on an extent record, let's go find the real
8623          * references and fix up the ones that don't match.
8624          */
8625         list_for_each_entry(back, &rec->backrefs, list) {
8626                 if (back->full_backref || !back->is_data)
8627                         continue;
8628
8629                 dback = to_data_backref(back);
8630
8631                 /*
8632                  * Still ignoring backrefs that don't have a real ref attached
8633                  * to them.
8634                  */
8635                 if (dback->found_ref == 0)
8636                         continue;
8637
8638                 if (dback->bytes == best->bytes &&
8639                     dback->disk_bytenr == best->bytenr)
8640                         continue;
8641
8642                 ret = repair_ref(info, path, dback, best);
8643                 if (ret)
8644                         goto out;
8645         }
8646
8647         /*
8648          * Ok we messed with the actual refs, which means we need to drop our
8649          * entire cache and go back and rescan.  I know this is a huge pain and
8650          * adds a lot of extra work, but it's the only way to be safe.  Once all
8651          * the backrefs agree we may not need to do anything to the extent
8652          * record itself.
8653          */
8654         ret = -EAGAIN;
8655 out:
8656         while (!list_empty(&entries)) {
8657                 entry = list_entry(entries.next, struct extent_entry, list);
8658                 list_del_init(&entry->list);
8659                 free(entry);
8660         }
8661         return ret;
8662 }
8663
8664 static int process_duplicates(struct cache_tree *extent_cache,
8665                               struct extent_record *rec)
8666 {
8667         struct extent_record *good, *tmp;
8668         struct cache_extent *cache;
8669         int ret;
8670
8671         /*
8672          * If we found a extent record for this extent then return, or if we
8673          * have more than one duplicate we are likely going to need to delete
8674          * something.
8675          */
8676         if (rec->found_rec || rec->num_duplicates > 1)
8677                 return 0;
8678
8679         /* Shouldn't happen but just in case */
8680         BUG_ON(!rec->num_duplicates);
8681
8682         /*
8683          * So this happens if we end up with a backref that doesn't match the
8684          * actual extent entry.  So either the backref is bad or the extent
8685          * entry is bad.  Either way we want to have the extent_record actually
8686          * reflect what we found in the extent_tree, so we need to take the
8687          * duplicate out and use that as the extent_record since the only way we
8688          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8689          */
8690         remove_cache_extent(extent_cache, &rec->cache);
8691
8692         good = to_extent_record(rec->dups.next);
8693         list_del_init(&good->list);
8694         INIT_LIST_HEAD(&good->backrefs);
8695         INIT_LIST_HEAD(&good->dups);
8696         good->cache.start = good->start;
8697         good->cache.size = good->nr;
8698         good->content_checked = 0;
8699         good->owner_ref_checked = 0;
8700         good->num_duplicates = 0;
8701         good->refs = rec->refs;
8702         list_splice_init(&rec->backrefs, &good->backrefs);
8703         while (1) {
8704                 cache = lookup_cache_extent(extent_cache, good->start,
8705                                             good->nr);
8706                 if (!cache)
8707                         break;
8708                 tmp = container_of(cache, struct extent_record, cache);
8709
8710                 /*
8711                  * If we find another overlapping extent and it's found_rec is
8712                  * set then it's a duplicate and we need to try and delete
8713                  * something.
8714                  */
8715                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8716                         if (list_empty(&good->list))
8717                                 list_add_tail(&good->list,
8718                                               &duplicate_extents);
8719                         good->num_duplicates += tmp->num_duplicates + 1;
8720                         list_splice_init(&tmp->dups, &good->dups);
8721                         list_del_init(&tmp->list);
8722                         list_add_tail(&tmp->list, &good->dups);
8723                         remove_cache_extent(extent_cache, &tmp->cache);
8724                         continue;
8725                 }
8726
8727                 /*
8728                  * Ok we have another non extent item backed extent rec, so lets
8729                  * just add it to this extent and carry on like we did above.
8730                  */
8731                 good->refs += tmp->refs;
8732                 list_splice_init(&tmp->backrefs, &good->backrefs);
8733                 remove_cache_extent(extent_cache, &tmp->cache);
8734                 free(tmp);
8735         }
8736         ret = insert_cache_extent(extent_cache, &good->cache);
8737         BUG_ON(ret);
8738         free(rec);
8739         return good->num_duplicates ? 0 : 1;
8740 }
8741
8742 static int delete_duplicate_records(struct btrfs_root *root,
8743                                     struct extent_record *rec)
8744 {
8745         struct btrfs_trans_handle *trans;
8746         LIST_HEAD(delete_list);
8747         struct btrfs_path path;
8748         struct extent_record *tmp, *good, *n;
8749         int nr_del = 0;
8750         int ret = 0, err;
8751         struct btrfs_key key;
8752
8753         btrfs_init_path(&path);
8754
8755         good = rec;
8756         /* Find the record that covers all of the duplicates. */
8757         list_for_each_entry(tmp, &rec->dups, list) {
8758                 if (good->start < tmp->start)
8759                         continue;
8760                 if (good->nr > tmp->nr)
8761                         continue;
8762
8763                 if (tmp->start + tmp->nr < good->start + good->nr) {
8764                         fprintf(stderr, "Ok we have overlapping extents that "
8765                                 "aren't completely covered by each other, this "
8766                                 "is going to require more careful thought.  "
8767                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8768                                 tmp->start, tmp->nr, good->start, good->nr);
8769                         abort();
8770                 }
8771                 good = tmp;
8772         }
8773
8774         if (good != rec)
8775                 list_add_tail(&rec->list, &delete_list);
8776
8777         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8778                 if (tmp == good)
8779                         continue;
8780                 list_move_tail(&tmp->list, &delete_list);
8781         }
8782
8783         root = root->fs_info->extent_root;
8784         trans = btrfs_start_transaction(root, 1);
8785         if (IS_ERR(trans)) {
8786                 ret = PTR_ERR(trans);
8787                 goto out;
8788         }
8789
8790         list_for_each_entry(tmp, &delete_list, list) {
8791                 if (tmp->found_rec == 0)
8792                         continue;
8793                 key.objectid = tmp->start;
8794                 key.type = BTRFS_EXTENT_ITEM_KEY;
8795                 key.offset = tmp->nr;
8796
8797                 /* Shouldn't happen but just in case */
8798                 if (tmp->metadata) {
8799                         fprintf(stderr, "Well this shouldn't happen, extent "
8800                                 "record overlaps but is metadata? "
8801                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8802                         abort();
8803                 }
8804
8805                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8806                 if (ret) {
8807                         if (ret > 0)
8808                                 ret = -EINVAL;
8809                         break;
8810                 }
8811                 ret = btrfs_del_item(trans, root, &path);
8812                 if (ret)
8813                         break;
8814                 btrfs_release_path(&path);
8815                 nr_del++;
8816         }
8817         err = btrfs_commit_transaction(trans, root);
8818         if (err && !ret)
8819                 ret = err;
8820 out:
8821         while (!list_empty(&delete_list)) {
8822                 tmp = to_extent_record(delete_list.next);
8823                 list_del_init(&tmp->list);
8824                 if (tmp == rec)
8825                         continue;
8826                 free(tmp);
8827         }
8828
8829         while (!list_empty(&rec->dups)) {
8830                 tmp = to_extent_record(rec->dups.next);
8831                 list_del_init(&tmp->list);
8832                 free(tmp);
8833         }
8834
8835         btrfs_release_path(&path);
8836
8837         if (!ret && !nr_del)
8838                 rec->num_duplicates = 0;
8839
8840         return ret ? ret : nr_del;
8841 }
8842
8843 static int find_possible_backrefs(struct btrfs_fs_info *info,
8844                                   struct btrfs_path *path,
8845                                   struct cache_tree *extent_cache,
8846                                   struct extent_record *rec)
8847 {
8848         struct btrfs_root *root;
8849         struct extent_backref *back;
8850         struct data_backref *dback;
8851         struct cache_extent *cache;
8852         struct btrfs_file_extent_item *fi;
8853         struct btrfs_key key;
8854         u64 bytenr, bytes;
8855         int ret;
8856
8857         list_for_each_entry(back, &rec->backrefs, list) {
8858                 /* Don't care about full backrefs (poor unloved backrefs) */
8859                 if (back->full_backref || !back->is_data)
8860                         continue;
8861
8862                 dback = to_data_backref(back);
8863
8864                 /* We found this one, we don't need to do a lookup */
8865                 if (dback->found_ref)
8866                         continue;
8867
8868                 key.objectid = dback->root;
8869                 key.type = BTRFS_ROOT_ITEM_KEY;
8870                 key.offset = (u64)-1;
8871
8872                 root = btrfs_read_fs_root(info, &key);
8873
8874                 /* No root, definitely a bad ref, skip */
8875                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8876                         continue;
8877                 /* Other err, exit */
8878                 if (IS_ERR(root))
8879                         return PTR_ERR(root);
8880
8881                 key.objectid = dback->owner;
8882                 key.type = BTRFS_EXTENT_DATA_KEY;
8883                 key.offset = dback->offset;
8884                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8885                 if (ret) {
8886                         btrfs_release_path(path);
8887                         if (ret < 0)
8888                                 return ret;
8889                         /* Didn't find it, we can carry on */
8890                         ret = 0;
8891                         continue;
8892                 }
8893
8894                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8895                                     struct btrfs_file_extent_item);
8896                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8897                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8898                 btrfs_release_path(path);
8899                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8900                 if (cache) {
8901                         struct extent_record *tmp;
8902                         tmp = container_of(cache, struct extent_record, cache);
8903
8904                         /*
8905                          * If we found an extent record for the bytenr for this
8906                          * particular backref then we can't add it to our
8907                          * current extent record.  We only want to add backrefs
8908                          * that don't have a corresponding extent item in the
8909                          * extent tree since they likely belong to this record
8910                          * and we need to fix it if it doesn't match bytenrs.
8911                          */
8912                         if  (tmp->found_rec)
8913                                 continue;
8914                 }
8915
8916                 dback->found_ref += 1;
8917                 dback->disk_bytenr = bytenr;
8918                 dback->bytes = bytes;
8919
8920                 /*
8921                  * Set this so the verify backref code knows not to trust the
8922                  * values in this backref.
8923                  */
8924                 back->broken = 1;
8925         }
8926
8927         return 0;
8928 }
8929
8930 /*
8931  * Record orphan data ref into corresponding root.
8932  *
8933  * Return 0 if the extent item contains data ref and recorded.
8934  * Return 1 if the extent item contains no useful data ref
8935  *   On that case, it may contains only shared_dataref or metadata backref
8936  *   or the file extent exists(this should be handled by the extent bytenr
8937  *   recovery routine)
8938  * Return <0 if something goes wrong.
8939  */
8940 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8941                                       struct extent_record *rec)
8942 {
8943         struct btrfs_key key;
8944         struct btrfs_root *dest_root;
8945         struct extent_backref *back;
8946         struct data_backref *dback;
8947         struct orphan_data_extent *orphan;
8948         struct btrfs_path path;
8949         int recorded_data_ref = 0;
8950         int ret = 0;
8951
8952         if (rec->metadata)
8953                 return 1;
8954         btrfs_init_path(&path);
8955         list_for_each_entry(back, &rec->backrefs, list) {
8956                 if (back->full_backref || !back->is_data ||
8957                     !back->found_extent_tree)
8958                         continue;
8959                 dback = to_data_backref(back);
8960                 if (dback->found_ref)
8961                         continue;
8962                 key.objectid = dback->root;
8963                 key.type = BTRFS_ROOT_ITEM_KEY;
8964                 key.offset = (u64)-1;
8965
8966                 dest_root = btrfs_read_fs_root(fs_info, &key);
8967
8968                 /* For non-exist root we just skip it */
8969                 if (IS_ERR(dest_root) || !dest_root)
8970                         continue;
8971
8972                 key.objectid = dback->owner;
8973                 key.type = BTRFS_EXTENT_DATA_KEY;
8974                 key.offset = dback->offset;
8975
8976                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8977                 btrfs_release_path(&path);
8978                 /*
8979                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8980                  * we need to record it for inode/file extent rebuild.
8981                  * For ret > 0, we record it only for file extent rebuild.
8982                  * For ret == 0, the file extent exists but only bytenr
8983                  * mismatch, let the original bytenr fix routine to handle,
8984                  * don't record it.
8985                  */
8986                 if (ret == 0)
8987                         continue;
8988                 ret = 0;
8989                 orphan = malloc(sizeof(*orphan));
8990                 if (!orphan) {
8991                         ret = -ENOMEM;
8992                         goto out;
8993                 }
8994                 INIT_LIST_HEAD(&orphan->list);
8995                 orphan->root = dback->root;
8996                 orphan->objectid = dback->owner;
8997                 orphan->offset = dback->offset;
8998                 orphan->disk_bytenr = rec->cache.start;
8999                 orphan->disk_len = rec->cache.size;
9000                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9001                 recorded_data_ref = 1;
9002         }
9003 out:
9004         btrfs_release_path(&path);
9005         if (!ret)
9006                 return !recorded_data_ref;
9007         else
9008                 return ret;
9009 }
9010
9011 /*
9012  * when an incorrect extent item is found, this will delete
9013  * all of the existing entries for it and recreate them
9014  * based on what the tree scan found.
9015  */
9016 static int fixup_extent_refs(struct btrfs_fs_info *info,
9017                              struct cache_tree *extent_cache,
9018                              struct extent_record *rec)
9019 {
9020         struct btrfs_trans_handle *trans = NULL;
9021         int ret;
9022         struct btrfs_path path;
9023         struct list_head *cur = rec->backrefs.next;
9024         struct cache_extent *cache;
9025         struct extent_backref *back;
9026         int allocated = 0;
9027         u64 flags = 0;
9028
9029         if (rec->flag_block_full_backref)
9030                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9031
9032         btrfs_init_path(&path);
9033         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9034                 /*
9035                  * Sometimes the backrefs themselves are so broken they don't
9036                  * get attached to any meaningful rec, so first go back and
9037                  * check any of our backrefs that we couldn't find and throw
9038                  * them into the list if we find the backref so that
9039                  * verify_backrefs can figure out what to do.
9040                  */
9041                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9042                 if (ret < 0)
9043                         goto out;
9044         }
9045
9046         /* step one, make sure all of the backrefs agree */
9047         ret = verify_backrefs(info, &path, rec);
9048         if (ret < 0)
9049                 goto out;
9050
9051         trans = btrfs_start_transaction(info->extent_root, 1);
9052         if (IS_ERR(trans)) {
9053                 ret = PTR_ERR(trans);
9054                 goto out;
9055         }
9056
9057         /* step two, delete all the existing records */
9058         ret = delete_extent_records(trans, info->extent_root, &path,
9059                                     rec->start);
9060
9061         if (ret < 0)
9062                 goto out;
9063
9064         /* was this block corrupt?  If so, don't add references to it */
9065         cache = lookup_cache_extent(info->corrupt_blocks,
9066                                     rec->start, rec->max_size);
9067         if (cache) {
9068                 ret = 0;
9069                 goto out;
9070         }
9071
9072         /* step three, recreate all the refs we did find */
9073         while(cur != &rec->backrefs) {
9074                 back = to_extent_backref(cur);
9075                 cur = cur->next;
9076
9077                 /*
9078                  * if we didn't find any references, don't create a
9079                  * new extent record
9080                  */
9081                 if (!back->found_ref)
9082                         continue;
9083
9084                 rec->bad_full_backref = 0;
9085                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9086                 allocated = 1;
9087
9088                 if (ret)
9089                         goto out;
9090         }
9091 out:
9092         if (trans) {
9093                 int err = btrfs_commit_transaction(trans, info->extent_root);
9094                 if (!ret)
9095                         ret = err;
9096         }
9097
9098         if (!ret)
9099                 fprintf(stderr, "Repaired extent references for %llu\n",
9100                                 (unsigned long long)rec->start);
9101
9102         btrfs_release_path(&path);
9103         return ret;
9104 }
9105
9106 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9107                               struct extent_record *rec)
9108 {
9109         struct btrfs_trans_handle *trans;
9110         struct btrfs_root *root = fs_info->extent_root;
9111         struct btrfs_path path;
9112         struct btrfs_extent_item *ei;
9113         struct btrfs_key key;
9114         u64 flags;
9115         int ret = 0;
9116
9117         key.objectid = rec->start;
9118         if (rec->metadata) {
9119                 key.type = BTRFS_METADATA_ITEM_KEY;
9120                 key.offset = rec->info_level;
9121         } else {
9122                 key.type = BTRFS_EXTENT_ITEM_KEY;
9123                 key.offset = rec->max_size;
9124         }
9125
9126         trans = btrfs_start_transaction(root, 0);
9127         if (IS_ERR(trans))
9128                 return PTR_ERR(trans);
9129
9130         btrfs_init_path(&path);
9131         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9132         if (ret < 0) {
9133                 btrfs_release_path(&path);
9134                 btrfs_commit_transaction(trans, root);
9135                 return ret;
9136         } else if (ret) {
9137                 fprintf(stderr, "Didn't find extent for %llu\n",
9138                         (unsigned long long)rec->start);
9139                 btrfs_release_path(&path);
9140                 btrfs_commit_transaction(trans, root);
9141                 return -ENOENT;
9142         }
9143
9144         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9145                             struct btrfs_extent_item);
9146         flags = btrfs_extent_flags(path.nodes[0], ei);
9147         if (rec->flag_block_full_backref) {
9148                 fprintf(stderr, "setting full backref on %llu\n",
9149                         (unsigned long long)key.objectid);
9150                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9151         } else {
9152                 fprintf(stderr, "clearing full backref on %llu\n",
9153                         (unsigned long long)key.objectid);
9154                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9155         }
9156         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9157         btrfs_mark_buffer_dirty(path.nodes[0]);
9158         btrfs_release_path(&path);
9159         ret = btrfs_commit_transaction(trans, root);
9160         if (!ret)
9161                 fprintf(stderr, "Repaired extent flags for %llu\n",
9162                                 (unsigned long long)rec->start);
9163
9164         return ret;
9165 }
9166
9167 /* right now we only prune from the extent allocation tree */
9168 static int prune_one_block(struct btrfs_trans_handle *trans,
9169                            struct btrfs_fs_info *info,
9170                            struct btrfs_corrupt_block *corrupt)
9171 {
9172         int ret;
9173         struct btrfs_path path;
9174         struct extent_buffer *eb;
9175         u64 found;
9176         int slot;
9177         int nritems;
9178         int level = corrupt->level + 1;
9179
9180         btrfs_init_path(&path);
9181 again:
9182         /* we want to stop at the parent to our busted block */
9183         path.lowest_level = level;
9184
9185         ret = btrfs_search_slot(trans, info->extent_root,
9186                                 &corrupt->key, &path, -1, 1);
9187
9188         if (ret < 0)
9189                 goto out;
9190
9191         eb = path.nodes[level];
9192         if (!eb) {
9193                 ret = -ENOENT;
9194                 goto out;
9195         }
9196
9197         /*
9198          * hopefully the search gave us the block we want to prune,
9199          * lets try that first
9200          */
9201         slot = path.slots[level];
9202         found =  btrfs_node_blockptr(eb, slot);
9203         if (found == corrupt->cache.start)
9204                 goto del_ptr;
9205
9206         nritems = btrfs_header_nritems(eb);
9207
9208         /* the search failed, lets scan this node and hope we find it */
9209         for (slot = 0; slot < nritems; slot++) {
9210                 found =  btrfs_node_blockptr(eb, slot);
9211                 if (found == corrupt->cache.start)
9212                         goto del_ptr;
9213         }
9214         /*
9215          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9216          * to this block
9217          */
9218         if (eb == info->extent_root->node) {
9219                 ret = -ENOENT;
9220                 goto out;
9221         } else {
9222                 level++;
9223                 btrfs_release_path(&path);
9224                 goto again;
9225         }
9226
9227 del_ptr:
9228         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9229         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9230
9231 out:
9232         btrfs_release_path(&path);
9233         return ret;
9234 }
9235
9236 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9237 {
9238         struct btrfs_trans_handle *trans = NULL;
9239         struct cache_extent *cache;
9240         struct btrfs_corrupt_block *corrupt;
9241
9242         while (1) {
9243                 cache = search_cache_extent(info->corrupt_blocks, 0);
9244                 if (!cache)
9245                         break;
9246                 if (!trans) {
9247                         trans = btrfs_start_transaction(info->extent_root, 1);
9248                         if (IS_ERR(trans))
9249                                 return PTR_ERR(trans);
9250                 }
9251                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9252                 prune_one_block(trans, info, corrupt);
9253                 remove_cache_extent(info->corrupt_blocks, cache);
9254         }
9255         if (trans)
9256                 return btrfs_commit_transaction(trans, info->extent_root);
9257         return 0;
9258 }
9259
9260 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9261 {
9262         struct btrfs_block_group_cache *cache;
9263         u64 start, end;
9264         int ret;
9265
9266         while (1) {
9267                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9268                                             &start, &end, EXTENT_DIRTY);
9269                 if (ret)
9270                         break;
9271                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9272         }
9273
9274         start = 0;
9275         while (1) {
9276                 cache = btrfs_lookup_first_block_group(fs_info, start);
9277                 if (!cache)
9278                         break;
9279                 if (cache->cached)
9280                         cache->cached = 0;
9281                 start = cache->key.objectid + cache->key.offset;
9282         }
9283 }
9284
9285 static int check_extent_refs(struct btrfs_root *root,
9286                              struct cache_tree *extent_cache)
9287 {
9288         struct extent_record *rec;
9289         struct cache_extent *cache;
9290         int ret = 0;
9291         int had_dups = 0;
9292
9293         if (repair) {
9294                 /*
9295                  * if we're doing a repair, we have to make sure
9296                  * we don't allocate from the problem extents.
9297                  * In the worst case, this will be all the
9298                  * extents in the FS
9299                  */
9300                 cache = search_cache_extent(extent_cache, 0);
9301                 while(cache) {
9302                         rec = container_of(cache, struct extent_record, cache);
9303                         set_extent_dirty(root->fs_info->excluded_extents,
9304                                          rec->start,
9305                                          rec->start + rec->max_size - 1);
9306                         cache = next_cache_extent(cache);
9307                 }
9308
9309                 /* pin down all the corrupted blocks too */
9310                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9311                 while(cache) {
9312                         set_extent_dirty(root->fs_info->excluded_extents,
9313                                          cache->start,
9314                                          cache->start + cache->size - 1);
9315                         cache = next_cache_extent(cache);
9316                 }
9317                 prune_corrupt_blocks(root->fs_info);
9318                 reset_cached_block_groups(root->fs_info);
9319         }
9320
9321         reset_cached_block_groups(root->fs_info);
9322
9323         /*
9324          * We need to delete any duplicate entries we find first otherwise we
9325          * could mess up the extent tree when we have backrefs that actually
9326          * belong to a different extent item and not the weird duplicate one.
9327          */
9328         while (repair && !list_empty(&duplicate_extents)) {
9329                 rec = to_extent_record(duplicate_extents.next);
9330                 list_del_init(&rec->list);
9331
9332                 /* Sometimes we can find a backref before we find an actual
9333                  * extent, so we need to process it a little bit to see if there
9334                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9335                  * if this is a backref screwup.  If we need to delete stuff
9336                  * process_duplicates() will return 0, otherwise it will return
9337                  * 1 and we
9338                  */
9339                 if (process_duplicates(extent_cache, rec))
9340                         continue;
9341                 ret = delete_duplicate_records(root, rec);
9342                 if (ret < 0)
9343                         return ret;
9344                 /*
9345                  * delete_duplicate_records will return the number of entries
9346                  * deleted, so if it's greater than 0 then we know we actually
9347                  * did something and we need to remove.
9348                  */
9349                 if (ret)
9350                         had_dups = 1;
9351         }
9352
9353         if (had_dups)
9354                 return -EAGAIN;
9355
9356         while(1) {
9357                 int cur_err = 0;
9358                 int fix = 0;
9359
9360                 cache = search_cache_extent(extent_cache, 0);
9361                 if (!cache)
9362                         break;
9363                 rec = container_of(cache, struct extent_record, cache);
9364                 if (rec->num_duplicates) {
9365                         fprintf(stderr, "extent item %llu has multiple extent "
9366                                 "items\n", (unsigned long long)rec->start);
9367                         cur_err = 1;
9368                 }
9369
9370                 if (rec->refs != rec->extent_item_refs) {
9371                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9372                                 (unsigned long long)rec->start,
9373                                 (unsigned long long)rec->nr);
9374                         fprintf(stderr, "extent item %llu, found %llu\n",
9375                                 (unsigned long long)rec->extent_item_refs,
9376                                 (unsigned long long)rec->refs);
9377                         ret = record_orphan_data_extents(root->fs_info, rec);
9378                         if (ret < 0)
9379                                 goto repair_abort;
9380                         fix = ret;
9381                         cur_err = 1;
9382                 }
9383                 if (all_backpointers_checked(rec, 1)) {
9384                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9385                                 (unsigned long long)rec->start,
9386                                 (unsigned long long)rec->nr);
9387                         fix = 1;
9388                         cur_err = 1;
9389                 }
9390                 if (!rec->owner_ref_checked) {
9391                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9392                                 (unsigned long long)rec->start,
9393                                 (unsigned long long)rec->nr);
9394                         fix = 1;
9395                         cur_err = 1;
9396                 }
9397
9398                 if (repair && fix) {
9399                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9400                         if (ret)
9401                                 goto repair_abort;
9402                 }
9403
9404
9405                 if (rec->bad_full_backref) {
9406                         fprintf(stderr, "bad full backref, on [%llu]\n",
9407                                 (unsigned long long)rec->start);
9408                         if (repair) {
9409                                 ret = fixup_extent_flags(root->fs_info, rec);
9410                                 if (ret)
9411                                         goto repair_abort;
9412                                 fix = 1;
9413                         }
9414                         cur_err = 1;
9415                 }
9416                 /*
9417                  * Although it's not a extent ref's problem, we reuse this
9418                  * routine for error reporting.
9419                  * No repair function yet.
9420                  */
9421                 if (rec->crossing_stripes) {
9422                         fprintf(stderr,
9423                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9424                                 rec->start, rec->start + rec->max_size);
9425                         cur_err = 1;
9426                 }
9427
9428                 if (rec->wrong_chunk_type) {
9429                         fprintf(stderr,
9430                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9431                                 rec->start, rec->start + rec->max_size);
9432                         cur_err = 1;
9433                 }
9434
9435                 remove_cache_extent(extent_cache, cache);
9436                 free_all_extent_backrefs(rec);
9437                 if (!init_extent_tree && repair && (!cur_err || fix))
9438                         clear_extent_dirty(root->fs_info->excluded_extents,
9439                                            rec->start,
9440                                            rec->start + rec->max_size - 1);
9441                 free(rec);
9442         }
9443 repair_abort:
9444         if (repair) {
9445                 if (ret && ret != -EAGAIN) {
9446                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9447                         exit(1);
9448                 } else if (!ret) {
9449                         struct btrfs_trans_handle *trans;
9450
9451                         root = root->fs_info->extent_root;
9452                         trans = btrfs_start_transaction(root, 1);
9453                         if (IS_ERR(trans)) {
9454                                 ret = PTR_ERR(trans);
9455                                 goto repair_abort;
9456                         }
9457
9458                         btrfs_fix_block_accounting(trans, root);
9459                         ret = btrfs_commit_transaction(trans, root);
9460                         if (ret)
9461                                 goto repair_abort;
9462                 }
9463                 return ret;
9464         }
9465         return 0;
9466 }
9467
9468 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9469 {
9470         u64 stripe_size;
9471
9472         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9473                 stripe_size = length;
9474                 stripe_size /= num_stripes;
9475         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9476                 stripe_size = length * 2;
9477                 stripe_size /= num_stripes;
9478         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9479                 stripe_size = length;
9480                 stripe_size /= (num_stripes - 1);
9481         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9482                 stripe_size = length;
9483                 stripe_size /= (num_stripes - 2);
9484         } else {
9485                 stripe_size = length;
9486         }
9487         return stripe_size;
9488 }
9489
9490 /*
9491  * Check the chunk with its block group/dev list ref:
9492  * Return 0 if all refs seems valid.
9493  * Return 1 if part of refs seems valid, need later check for rebuild ref
9494  * like missing block group and needs to search extent tree to rebuild them.
9495  * Return -1 if essential refs are missing and unable to rebuild.
9496  */
9497 static int check_chunk_refs(struct chunk_record *chunk_rec,
9498                             struct block_group_tree *block_group_cache,
9499                             struct device_extent_tree *dev_extent_cache,
9500                             int silent)
9501 {
9502         struct cache_extent *block_group_item;
9503         struct block_group_record *block_group_rec;
9504         struct cache_extent *dev_extent_item;
9505         struct device_extent_record *dev_extent_rec;
9506         u64 devid;
9507         u64 offset;
9508         u64 length;
9509         int metadump_v2 = 0;
9510         int i;
9511         int ret = 0;
9512
9513         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9514                                                chunk_rec->offset,
9515                                                chunk_rec->length);
9516         if (block_group_item) {
9517                 block_group_rec = container_of(block_group_item,
9518                                                struct block_group_record,
9519                                                cache);
9520                 if (chunk_rec->length != block_group_rec->offset ||
9521                     chunk_rec->offset != block_group_rec->objectid ||
9522                     (!metadump_v2 &&
9523                      chunk_rec->type_flags != block_group_rec->flags)) {
9524                         if (!silent)
9525                                 fprintf(stderr,
9526                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9527                                         chunk_rec->objectid,
9528                                         chunk_rec->type,
9529                                         chunk_rec->offset,
9530                                         chunk_rec->length,
9531                                         chunk_rec->offset,
9532                                         chunk_rec->type_flags,
9533                                         block_group_rec->objectid,
9534                                         block_group_rec->type,
9535                                         block_group_rec->offset,
9536                                         block_group_rec->offset,
9537                                         block_group_rec->objectid,
9538                                         block_group_rec->flags);
9539                         ret = -1;
9540                 } else {
9541                         list_del_init(&block_group_rec->list);
9542                         chunk_rec->bg_rec = block_group_rec;
9543                 }
9544         } else {
9545                 if (!silent)
9546                         fprintf(stderr,
9547                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9548                                 chunk_rec->objectid,
9549                                 chunk_rec->type,
9550                                 chunk_rec->offset,
9551                                 chunk_rec->length,
9552                                 chunk_rec->offset,
9553                                 chunk_rec->type_flags);
9554                 ret = 1;
9555         }
9556
9557         if (metadump_v2)
9558                 return ret;
9559
9560         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9561                                     chunk_rec->num_stripes);
9562         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9563                 devid = chunk_rec->stripes[i].devid;
9564                 offset = chunk_rec->stripes[i].offset;
9565                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9566                                                        devid, offset, length);
9567                 if (dev_extent_item) {
9568                         dev_extent_rec = container_of(dev_extent_item,
9569                                                 struct device_extent_record,
9570                                                 cache);
9571                         if (dev_extent_rec->objectid != devid ||
9572                             dev_extent_rec->offset != offset ||
9573                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9574                             dev_extent_rec->length != length) {
9575                                 if (!silent)
9576                                         fprintf(stderr,
9577                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9578                                                 chunk_rec->objectid,
9579                                                 chunk_rec->type,
9580                                                 chunk_rec->offset,
9581                                                 chunk_rec->stripes[i].devid,
9582                                                 chunk_rec->stripes[i].offset,
9583                                                 dev_extent_rec->objectid,
9584                                                 dev_extent_rec->offset,
9585                                                 dev_extent_rec->length);
9586                                 ret = -1;
9587                         } else {
9588                                 list_move(&dev_extent_rec->chunk_list,
9589                                           &chunk_rec->dextents);
9590                         }
9591                 } else {
9592                         if (!silent)
9593                                 fprintf(stderr,
9594                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9595                                         chunk_rec->objectid,
9596                                         chunk_rec->type,
9597                                         chunk_rec->offset,
9598                                         chunk_rec->stripes[i].devid,
9599                                         chunk_rec->stripes[i].offset);
9600                         ret = -1;
9601                 }
9602         }
9603         return ret;
9604 }
9605
9606 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9607 int check_chunks(struct cache_tree *chunk_cache,
9608                  struct block_group_tree *block_group_cache,
9609                  struct device_extent_tree *dev_extent_cache,
9610                  struct list_head *good, struct list_head *bad,
9611                  struct list_head *rebuild, int silent)
9612 {
9613         struct cache_extent *chunk_item;
9614         struct chunk_record *chunk_rec;
9615         struct block_group_record *bg_rec;
9616         struct device_extent_record *dext_rec;
9617         int err;
9618         int ret = 0;
9619
9620         chunk_item = first_cache_extent(chunk_cache);
9621         while (chunk_item) {
9622                 chunk_rec = container_of(chunk_item, struct chunk_record,
9623                                          cache);
9624                 err = check_chunk_refs(chunk_rec, block_group_cache,
9625                                        dev_extent_cache, silent);
9626                 if (err < 0)
9627                         ret = err;
9628                 if (err == 0 && good)
9629                         list_add_tail(&chunk_rec->list, good);
9630                 if (err > 0 && rebuild)
9631                         list_add_tail(&chunk_rec->list, rebuild);
9632                 if (err < 0 && bad)
9633                         list_add_tail(&chunk_rec->list, bad);
9634                 chunk_item = next_cache_extent(chunk_item);
9635         }
9636
9637         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9638                 if (!silent)
9639                         fprintf(stderr,
9640                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9641                                 bg_rec->objectid,
9642                                 bg_rec->offset,
9643                                 bg_rec->flags);
9644                 if (!ret)
9645                         ret = 1;
9646         }
9647
9648         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9649                             chunk_list) {
9650                 if (!silent)
9651                         fprintf(stderr,
9652                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9653                                 dext_rec->objectid,
9654                                 dext_rec->offset,
9655                                 dext_rec->length);
9656                 if (!ret)
9657                         ret = 1;
9658         }
9659         return ret;
9660 }
9661
9662
9663 static int check_device_used(struct device_record *dev_rec,
9664                              struct device_extent_tree *dext_cache)
9665 {
9666         struct cache_extent *cache;
9667         struct device_extent_record *dev_extent_rec;
9668         u64 total_byte = 0;
9669
9670         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9671         while (cache) {
9672                 dev_extent_rec = container_of(cache,
9673                                               struct device_extent_record,
9674                                               cache);
9675                 if (dev_extent_rec->objectid != dev_rec->devid)
9676                         break;
9677
9678                 list_del_init(&dev_extent_rec->device_list);
9679                 total_byte += dev_extent_rec->length;
9680                 cache = next_cache_extent(cache);
9681         }
9682
9683         if (total_byte != dev_rec->byte_used) {
9684                 fprintf(stderr,
9685                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9686                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9687                         dev_rec->type, dev_rec->offset);
9688                 return -1;
9689         } else {
9690                 return 0;
9691         }
9692 }
9693
9694 /* check btrfs_dev_item -> btrfs_dev_extent */
9695 static int check_devices(struct rb_root *dev_cache,
9696                          struct device_extent_tree *dev_extent_cache)
9697 {
9698         struct rb_node *dev_node;
9699         struct device_record *dev_rec;
9700         struct device_extent_record *dext_rec;
9701         int err;
9702         int ret = 0;
9703
9704         dev_node = rb_first(dev_cache);
9705         while (dev_node) {
9706                 dev_rec = container_of(dev_node, struct device_record, node);
9707                 err = check_device_used(dev_rec, dev_extent_cache);
9708                 if (err)
9709                         ret = err;
9710
9711                 dev_node = rb_next(dev_node);
9712         }
9713         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9714                             device_list) {
9715                 fprintf(stderr,
9716                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9717                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9718                 if (!ret)
9719                         ret = 1;
9720         }
9721         return ret;
9722 }
9723
9724 static int add_root_item_to_list(struct list_head *head,
9725                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9726                                   u8 level, u8 drop_level,
9727                                   int level_size, struct btrfs_key *drop_key)
9728 {
9729
9730         struct root_item_record *ri_rec;
9731         ri_rec = malloc(sizeof(*ri_rec));
9732         if (!ri_rec)
9733                 return -ENOMEM;
9734         ri_rec->bytenr = bytenr;
9735         ri_rec->objectid = objectid;
9736         ri_rec->level = level;
9737         ri_rec->level_size = level_size;
9738         ri_rec->drop_level = drop_level;
9739         ri_rec->last_snapshot = last_snapshot;
9740         if (drop_key)
9741                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9742         list_add_tail(&ri_rec->list, head);
9743
9744         return 0;
9745 }
9746
9747 static void free_root_item_list(struct list_head *list)
9748 {
9749         struct root_item_record *ri_rec;
9750
9751         while (!list_empty(list)) {
9752                 ri_rec = list_first_entry(list, struct root_item_record,
9753                                           list);
9754                 list_del_init(&ri_rec->list);
9755                 free(ri_rec);
9756         }
9757 }
9758
9759 static int deal_root_from_list(struct list_head *list,
9760                                struct btrfs_root *root,
9761                                struct block_info *bits,
9762                                int bits_nr,
9763                                struct cache_tree *pending,
9764                                struct cache_tree *seen,
9765                                struct cache_tree *reada,
9766                                struct cache_tree *nodes,
9767                                struct cache_tree *extent_cache,
9768                                struct cache_tree *chunk_cache,
9769                                struct rb_root *dev_cache,
9770                                struct block_group_tree *block_group_cache,
9771                                struct device_extent_tree *dev_extent_cache)
9772 {
9773         int ret = 0;
9774         u64 last;
9775
9776         while (!list_empty(list)) {
9777                 struct root_item_record *rec;
9778                 struct extent_buffer *buf;
9779                 rec = list_entry(list->next,
9780                                  struct root_item_record, list);
9781                 last = 0;
9782                 buf = read_tree_block(root->fs_info,
9783                                       rec->bytenr, rec->level_size, 0);
9784                 if (!extent_buffer_uptodate(buf)) {
9785                         free_extent_buffer(buf);
9786                         ret = -EIO;
9787                         break;
9788                 }
9789                 ret = add_root_to_pending(buf, extent_cache, pending,
9790                                     seen, nodes, rec->objectid);
9791                 if (ret < 0)
9792                         break;
9793                 /*
9794                  * To rebuild extent tree, we need deal with snapshot
9795                  * one by one, otherwise we deal with node firstly which
9796                  * can maximize readahead.
9797                  */
9798                 while (1) {
9799                         ret = run_next_block(root, bits, bits_nr, &last,
9800                                              pending, seen, reada, nodes,
9801                                              extent_cache, chunk_cache,
9802                                              dev_cache, block_group_cache,
9803                                              dev_extent_cache, rec);
9804                         if (ret != 0)
9805                                 break;
9806                 }
9807                 free_extent_buffer(buf);
9808                 list_del(&rec->list);
9809                 free(rec);
9810                 if (ret < 0)
9811                         break;
9812         }
9813         while (ret >= 0) {
9814                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9815                                      reada, nodes, extent_cache, chunk_cache,
9816                                      dev_cache, block_group_cache,
9817                                      dev_extent_cache, NULL);
9818                 if (ret != 0) {
9819                         if (ret > 0)
9820                                 ret = 0;
9821                         break;
9822                 }
9823         }
9824         return ret;
9825 }
9826
9827 static int check_chunks_and_extents(struct btrfs_root *root)
9828 {
9829         struct rb_root dev_cache;
9830         struct cache_tree chunk_cache;
9831         struct block_group_tree block_group_cache;
9832         struct device_extent_tree dev_extent_cache;
9833         struct cache_tree extent_cache;
9834         struct cache_tree seen;
9835         struct cache_tree pending;
9836         struct cache_tree reada;
9837         struct cache_tree nodes;
9838         struct extent_io_tree excluded_extents;
9839         struct cache_tree corrupt_blocks;
9840         struct btrfs_path path;
9841         struct btrfs_key key;
9842         struct btrfs_key found_key;
9843         int ret, err = 0;
9844         struct block_info *bits;
9845         int bits_nr;
9846         struct extent_buffer *leaf;
9847         int slot;
9848         struct btrfs_root_item ri;
9849         struct list_head dropping_trees;
9850         struct list_head normal_trees;
9851         struct btrfs_root *root1;
9852         u64 objectid;
9853         u32 level_size;
9854         u8 level;
9855
9856         dev_cache = RB_ROOT;
9857         cache_tree_init(&chunk_cache);
9858         block_group_tree_init(&block_group_cache);
9859         device_extent_tree_init(&dev_extent_cache);
9860
9861         cache_tree_init(&extent_cache);
9862         cache_tree_init(&seen);
9863         cache_tree_init(&pending);
9864         cache_tree_init(&nodes);
9865         cache_tree_init(&reada);
9866         cache_tree_init(&corrupt_blocks);
9867         extent_io_tree_init(&excluded_extents);
9868         INIT_LIST_HEAD(&dropping_trees);
9869         INIT_LIST_HEAD(&normal_trees);
9870
9871         if (repair) {
9872                 root->fs_info->excluded_extents = &excluded_extents;
9873                 root->fs_info->fsck_extent_cache = &extent_cache;
9874                 root->fs_info->free_extent_hook = free_extent_hook;
9875                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9876         }
9877
9878         bits_nr = 1024;
9879         bits = malloc(bits_nr * sizeof(struct block_info));
9880         if (!bits) {
9881                 perror("malloc");
9882                 exit(1);
9883         }
9884
9885         if (ctx.progress_enabled) {
9886                 ctx.tp = TASK_EXTENTS;
9887                 task_start(ctx.info);
9888         }
9889
9890 again:
9891         root1 = root->fs_info->tree_root;
9892         level = btrfs_header_level(root1->node);
9893         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9894                                     root1->node->start, 0, level, 0,
9895                                     root1->fs_info->nodesize, NULL);
9896         if (ret < 0)
9897                 goto out;
9898         root1 = root->fs_info->chunk_root;
9899         level = btrfs_header_level(root1->node);
9900         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9901                                     root1->node->start, 0, level, 0,
9902                                     root1->fs_info->nodesize, NULL);
9903         if (ret < 0)
9904                 goto out;
9905         btrfs_init_path(&path);
9906         key.offset = 0;
9907         key.objectid = 0;
9908         key.type = BTRFS_ROOT_ITEM_KEY;
9909         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9910                                         &key, &path, 0, 0);
9911         if (ret < 0)
9912                 goto out;
9913         while(1) {
9914                 leaf = path.nodes[0];
9915                 slot = path.slots[0];
9916                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9917                         ret = btrfs_next_leaf(root, &path);
9918                         if (ret != 0)
9919                                 break;
9920                         leaf = path.nodes[0];
9921                         slot = path.slots[0];
9922                 }
9923                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9924                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9925                         unsigned long offset;
9926                         u64 last_snapshot;
9927
9928                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9929                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9930                         last_snapshot = btrfs_root_last_snapshot(&ri);
9931                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9932                                 level = btrfs_root_level(&ri);
9933                                 level_size = root->fs_info->nodesize;
9934                                 ret = add_root_item_to_list(&normal_trees,
9935                                                 found_key.objectid,
9936                                                 btrfs_root_bytenr(&ri),
9937                                                 last_snapshot, level,
9938                                                 0, level_size, NULL);
9939                                 if (ret < 0)
9940                                         goto out;
9941                         } else {
9942                                 level = btrfs_root_level(&ri);
9943                                 level_size = root->fs_info->nodesize;
9944                                 objectid = found_key.objectid;
9945                                 btrfs_disk_key_to_cpu(&found_key,
9946                                                       &ri.drop_progress);
9947                                 ret = add_root_item_to_list(&dropping_trees,
9948                                                 objectid,
9949                                                 btrfs_root_bytenr(&ri),
9950                                                 last_snapshot, level,
9951                                                 ri.drop_level,
9952                                                 level_size, &found_key);
9953                                 if (ret < 0)
9954                                         goto out;
9955                         }
9956                 }
9957                 path.slots[0]++;
9958         }
9959         btrfs_release_path(&path);
9960
9961         /*
9962          * check_block can return -EAGAIN if it fixes something, please keep
9963          * this in mind when dealing with return values from these functions, if
9964          * we get -EAGAIN we want to fall through and restart the loop.
9965          */
9966         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9967                                   &seen, &reada, &nodes, &extent_cache,
9968                                   &chunk_cache, &dev_cache, &block_group_cache,
9969                                   &dev_extent_cache);
9970         if (ret < 0) {
9971                 if (ret == -EAGAIN)
9972                         goto loop;
9973                 goto out;
9974         }
9975         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9976                                   &pending, &seen, &reada, &nodes,
9977                                   &extent_cache, &chunk_cache, &dev_cache,
9978                                   &block_group_cache, &dev_extent_cache);
9979         if (ret < 0) {
9980                 if (ret == -EAGAIN)
9981                         goto loop;
9982                 goto out;
9983         }
9984
9985         ret = check_chunks(&chunk_cache, &block_group_cache,
9986                            &dev_extent_cache, NULL, NULL, NULL, 0);
9987         if (ret) {
9988                 if (ret == -EAGAIN)
9989                         goto loop;
9990                 err = ret;
9991         }
9992
9993         ret = check_extent_refs(root, &extent_cache);
9994         if (ret < 0) {
9995                 if (ret == -EAGAIN)
9996                         goto loop;
9997                 goto out;
9998         }
9999
10000         ret = check_devices(&dev_cache, &dev_extent_cache);
10001         if (ret && err)
10002                 ret = err;
10003
10004 out:
10005         task_stop(ctx.info);
10006         if (repair) {
10007                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10008                 extent_io_tree_cleanup(&excluded_extents);
10009                 root->fs_info->fsck_extent_cache = NULL;
10010                 root->fs_info->free_extent_hook = NULL;
10011                 root->fs_info->corrupt_blocks = NULL;
10012                 root->fs_info->excluded_extents = NULL;
10013         }
10014         free(bits);
10015         free_chunk_cache_tree(&chunk_cache);
10016         free_device_cache_tree(&dev_cache);
10017         free_block_group_tree(&block_group_cache);
10018         free_device_extent_tree(&dev_extent_cache);
10019         free_extent_cache_tree(&seen);
10020         free_extent_cache_tree(&pending);
10021         free_extent_cache_tree(&reada);
10022         free_extent_cache_tree(&nodes);
10023         free_root_item_list(&normal_trees);
10024         free_root_item_list(&dropping_trees);
10025         return ret;
10026 loop:
10027         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10028         free_extent_cache_tree(&seen);
10029         free_extent_cache_tree(&pending);
10030         free_extent_cache_tree(&reada);
10031         free_extent_cache_tree(&nodes);
10032         free_chunk_cache_tree(&chunk_cache);
10033         free_block_group_tree(&block_group_cache);
10034         free_device_cache_tree(&dev_cache);
10035         free_device_extent_tree(&dev_extent_cache);
10036         free_extent_record_cache(&extent_cache);
10037         free_root_item_list(&normal_trees);
10038         free_root_item_list(&dropping_trees);
10039         extent_io_tree_cleanup(&excluded_extents);
10040         goto again;
10041 }
10042
10043 /*
10044  * Check backrefs of a tree block given by @bytenr or @eb.
10045  *
10046  * @root:       the root containing the @bytenr or @eb
10047  * @eb:         tree block extent buffer, can be NULL
10048  * @bytenr:     bytenr of the tree block to search
10049  * @level:      tree level of the tree block
10050  * @owner:      owner of the tree block
10051  *
10052  * Return >0 for any error found and output error message
10053  * Return 0 for no error found
10054  */
10055 static int check_tree_block_ref(struct btrfs_root *root,
10056                                 struct extent_buffer *eb, u64 bytenr,
10057                                 int level, u64 owner)
10058 {
10059         struct btrfs_key key;
10060         struct btrfs_root *extent_root = root->fs_info->extent_root;
10061         struct btrfs_path path;
10062         struct btrfs_extent_item *ei;
10063         struct btrfs_extent_inline_ref *iref;
10064         struct extent_buffer *leaf;
10065         unsigned long end;
10066         unsigned long ptr;
10067         int slot;
10068         int skinny_level;
10069         int type;
10070         u32 nodesize = root->fs_info->nodesize;
10071         u32 item_size;
10072         u64 offset;
10073         int tree_reloc_root = 0;
10074         int found_ref = 0;
10075         int err = 0;
10076         int ret;
10077
10078         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10079             btrfs_header_bytenr(root->node) == bytenr)
10080                 tree_reloc_root = 1;
10081
10082         btrfs_init_path(&path);
10083         key.objectid = bytenr;
10084         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10085                 key.type = BTRFS_METADATA_ITEM_KEY;
10086         else
10087                 key.type = BTRFS_EXTENT_ITEM_KEY;
10088         key.offset = (u64)-1;
10089
10090         /* Search for the backref in extent tree */
10091         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10092         if (ret < 0) {
10093                 err |= BACKREF_MISSING;
10094                 goto out;
10095         }
10096         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10097         if (ret) {
10098                 err |= BACKREF_MISSING;
10099                 goto out;
10100         }
10101
10102         leaf = path.nodes[0];
10103         slot = path.slots[0];
10104         btrfs_item_key_to_cpu(leaf, &key, slot);
10105
10106         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10107
10108         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10109                 skinny_level = (int)key.offset;
10110                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10111         } else {
10112                 struct btrfs_tree_block_info *info;
10113
10114                 info = (struct btrfs_tree_block_info *)(ei + 1);
10115                 skinny_level = btrfs_tree_block_level(leaf, info);
10116                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10117         }
10118
10119         if (eb) {
10120                 u64 header_gen;
10121                 u64 extent_gen;
10122
10123                 if (!(btrfs_extent_flags(leaf, ei) &
10124                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10125                         error(
10126                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10127                                 key.objectid, nodesize,
10128                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10129                         err = BACKREF_MISMATCH;
10130                 }
10131                 header_gen = btrfs_header_generation(eb);
10132                 extent_gen = btrfs_extent_generation(leaf, ei);
10133                 if (header_gen != extent_gen) {
10134                         error(
10135         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10136                                 key.objectid, nodesize, header_gen,
10137                                 extent_gen);
10138                         err = BACKREF_MISMATCH;
10139                 }
10140                 if (level != skinny_level) {
10141                         error(
10142                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10143                                 key.objectid, nodesize, level, skinny_level);
10144                         err = BACKREF_MISMATCH;
10145                 }
10146                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10147                         error(
10148                         "extent[%llu %u] is referred by other roots than %llu",
10149                                 key.objectid, nodesize, root->objectid);
10150                         err = BACKREF_MISMATCH;
10151                 }
10152         }
10153
10154         /*
10155          * Iterate the extent/metadata item to find the exact backref
10156          */
10157         item_size = btrfs_item_size_nr(leaf, slot);
10158         ptr = (unsigned long)iref;
10159         end = (unsigned long)ei + item_size;
10160         while (ptr < end) {
10161                 iref = (struct btrfs_extent_inline_ref *)ptr;
10162                 type = btrfs_extent_inline_ref_type(leaf, iref);
10163                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10164
10165                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10166                         (offset == root->objectid || offset == owner)) {
10167                         found_ref = 1;
10168                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10169                         /*
10170                          * Backref of tree reloc root points to itself, no need
10171                          * to check backref any more.
10172                          */
10173                         if (tree_reloc_root)
10174                                 found_ref = 1;
10175                         else
10176                         /* Check if the backref points to valid referencer */
10177                                 found_ref = !check_tree_block_ref(root, NULL,
10178                                                 offset, level + 1, owner);
10179                 }
10180
10181                 if (found_ref)
10182                         break;
10183                 ptr += btrfs_extent_inline_ref_size(type);
10184         }
10185
10186         /*
10187          * Inlined extent item doesn't have what we need, check
10188          * TREE_BLOCK_REF_KEY
10189          */
10190         if (!found_ref) {
10191                 btrfs_release_path(&path);
10192                 key.objectid = bytenr;
10193                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10194                 key.offset = root->objectid;
10195
10196                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10197                 if (!ret)
10198                         found_ref = 1;
10199         }
10200         if (!found_ref)
10201                 err |= BACKREF_MISSING;
10202 out:
10203         btrfs_release_path(&path);
10204         if (eb && (err & BACKREF_MISSING))
10205                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10206                         bytenr, nodesize, owner, level);
10207         return err;
10208 }
10209
10210 /*
10211  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10212  *
10213  * Return >0 any error found and output error message
10214  * Return 0 for no error found
10215  */
10216 static int check_extent_data_item(struct btrfs_root *root,
10217                                   struct extent_buffer *eb, int slot)
10218 {
10219         struct btrfs_file_extent_item *fi;
10220         struct btrfs_path path;
10221         struct btrfs_root *extent_root = root->fs_info->extent_root;
10222         struct btrfs_key fi_key;
10223         struct btrfs_key dbref_key;
10224         struct extent_buffer *leaf;
10225         struct btrfs_extent_item *ei;
10226         struct btrfs_extent_inline_ref *iref;
10227         struct btrfs_extent_data_ref *dref;
10228         u64 owner;
10229         u64 disk_bytenr;
10230         u64 disk_num_bytes;
10231         u64 extent_num_bytes;
10232         u64 extent_flags;
10233         u32 item_size;
10234         unsigned long end;
10235         unsigned long ptr;
10236         int type;
10237         u64 ref_root;
10238         int found_dbackref = 0;
10239         int err = 0;
10240         int ret;
10241
10242         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10243         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10244
10245         /* Nothing to check for hole and inline data extents */
10246         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10247             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10248                 return 0;
10249
10250         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10251         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10252         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10253
10254         /* Check unaligned disk_num_bytes and num_bytes */
10255         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10256                 error(
10257 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10258                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10259                         root->fs_info->sectorsize);
10260                 err |= BYTES_UNALIGNED;
10261         } else {
10262                 data_bytes_allocated += disk_num_bytes;
10263         }
10264         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10265                 error(
10266 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10267                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10268                         root->fs_info->sectorsize);
10269                 err |= BYTES_UNALIGNED;
10270         } else {
10271                 data_bytes_referenced += extent_num_bytes;
10272         }
10273         owner = btrfs_header_owner(eb);
10274
10275         /* Check the extent item of the file extent in extent tree */
10276         btrfs_init_path(&path);
10277         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10278         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10279         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10280
10281         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10282         if (ret)
10283                 goto out;
10284
10285         leaf = path.nodes[0];
10286         slot = path.slots[0];
10287         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10288
10289         extent_flags = btrfs_extent_flags(leaf, ei);
10290
10291         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10292                 error(
10293                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10294                     disk_bytenr, disk_num_bytes,
10295                     BTRFS_EXTENT_FLAG_DATA);
10296                 err |= BACKREF_MISMATCH;
10297         }
10298
10299         /* Check data backref inside that extent item */
10300         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10301         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10302         ptr = (unsigned long)iref;
10303         end = (unsigned long)ei + item_size;
10304         while (ptr < end) {
10305                 iref = (struct btrfs_extent_inline_ref *)ptr;
10306                 type = btrfs_extent_inline_ref_type(leaf, iref);
10307                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10308
10309                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10310                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10311                         if (ref_root == owner || ref_root == root->objectid)
10312                                 found_dbackref = 1;
10313                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10314                         found_dbackref = !check_tree_block_ref(root, NULL,
10315                                 btrfs_extent_inline_ref_offset(leaf, iref),
10316                                 0, owner);
10317                 }
10318
10319                 if (found_dbackref)
10320                         break;
10321                 ptr += btrfs_extent_inline_ref_size(type);
10322         }
10323
10324         if (!found_dbackref) {
10325                 btrfs_release_path(&path);
10326
10327                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10328                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10329                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10330                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10331                                 fi_key.objectid, fi_key.offset);
10332
10333                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10334                                         &dbref_key, &path, 0, 0);
10335                 if (!ret) {
10336                         found_dbackref = 1;
10337                         goto out;
10338                 }
10339
10340                 btrfs_release_path(&path);
10341
10342                 /*
10343                  * Neither inlined nor EXTENT_DATA_REF found, try
10344                  * SHARED_DATA_REF as last chance.
10345                  */
10346                 dbref_key.objectid = disk_bytenr;
10347                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10348                 dbref_key.offset = eb->start;
10349
10350                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10351                                         &dbref_key, &path, 0, 0);
10352                 if (!ret) {
10353                         found_dbackref = 1;
10354                         goto out;
10355                 }
10356         }
10357
10358 out:
10359         if (!found_dbackref)
10360                 err |= BACKREF_MISSING;
10361         btrfs_release_path(&path);
10362         if (err & BACKREF_MISSING) {
10363                 error("data extent[%llu %llu] backref lost",
10364                       disk_bytenr, disk_num_bytes);
10365         }
10366         return err;
10367 }
10368
10369 /*
10370  * Get real tree block level for the case like shared block
10371  * Return >= 0 as tree level
10372  * Return <0 for error
10373  */
10374 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10375 {
10376         struct extent_buffer *eb;
10377         struct btrfs_path path;
10378         struct btrfs_key key;
10379         struct btrfs_extent_item *ei;
10380         u64 flags;
10381         u64 transid;
10382         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10383         u8 backref_level;
10384         u8 header_level;
10385         int ret;
10386
10387         /* Search extent tree for extent generation and level */
10388         key.objectid = bytenr;
10389         key.type = BTRFS_METADATA_ITEM_KEY;
10390         key.offset = (u64)-1;
10391
10392         btrfs_init_path(&path);
10393         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10394         if (ret < 0)
10395                 goto release_out;
10396         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10397         if (ret < 0)
10398                 goto release_out;
10399         if (ret > 0) {
10400                 ret = -ENOENT;
10401                 goto release_out;
10402         }
10403
10404         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10405         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10406                             struct btrfs_extent_item);
10407         flags = btrfs_extent_flags(path.nodes[0], ei);
10408         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10409                 ret = -ENOENT;
10410                 goto release_out;
10411         }
10412
10413         /* Get transid for later read_tree_block() check */
10414         transid = btrfs_extent_generation(path.nodes[0], ei);
10415
10416         /* Get backref level as one source */
10417         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10418                 backref_level = key.offset;
10419         } else {
10420                 struct btrfs_tree_block_info *info;
10421
10422                 info = (struct btrfs_tree_block_info *)(ei + 1);
10423                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10424         }
10425         btrfs_release_path(&path);
10426
10427         /* Get level from tree block as an alternative source */
10428         eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10429         if (!extent_buffer_uptodate(eb)) {
10430                 free_extent_buffer(eb);
10431                 return -EIO;
10432         }
10433         header_level = btrfs_header_level(eb);
10434         free_extent_buffer(eb);
10435
10436         if (header_level != backref_level)
10437                 return -EIO;
10438         return header_level;
10439
10440 release_out:
10441         btrfs_release_path(&path);
10442         return ret;
10443 }
10444
10445 /*
10446  * Check if a tree block backref is valid (points to a valid tree block)
10447  * if level == -1, level will be resolved
10448  * Return >0 for any error found and print error message
10449  */
10450 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10451                                     u64 bytenr, int level)
10452 {
10453         struct btrfs_root *root;
10454         struct btrfs_key key;
10455         struct btrfs_path path;
10456         struct extent_buffer *eb;
10457         struct extent_buffer *node;
10458         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10459         int err = 0;
10460         int ret;
10461
10462         /* Query level for level == -1 special case */
10463         if (level == -1)
10464                 level = query_tree_block_level(fs_info, bytenr);
10465         if (level < 0) {
10466                 err |= REFERENCER_MISSING;
10467                 goto out;
10468         }
10469
10470         key.objectid = root_id;
10471         key.type = BTRFS_ROOT_ITEM_KEY;
10472         key.offset = (u64)-1;
10473
10474         root = btrfs_read_fs_root(fs_info, &key);
10475         if (IS_ERR(root)) {
10476                 err |= REFERENCER_MISSING;
10477                 goto out;
10478         }
10479
10480         /* Read out the tree block to get item/node key */
10481         eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10482         if (!extent_buffer_uptodate(eb)) {
10483                 err |= REFERENCER_MISSING;
10484                 free_extent_buffer(eb);
10485                 goto out;
10486         }
10487
10488         /* Empty tree, no need to check key */
10489         if (!btrfs_header_nritems(eb) && !level) {
10490                 free_extent_buffer(eb);
10491                 goto out;
10492         }
10493
10494         if (level)
10495                 btrfs_node_key_to_cpu(eb, &key, 0);
10496         else
10497                 btrfs_item_key_to_cpu(eb, &key, 0);
10498
10499         free_extent_buffer(eb);
10500
10501         btrfs_init_path(&path);
10502         path.lowest_level = level;
10503         /* Search with the first key, to ensure we can reach it */
10504         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10505         if (ret < 0) {
10506                 err |= REFERENCER_MISSING;
10507                 goto release_out;
10508         }
10509
10510         node = path.nodes[level];
10511         if (btrfs_header_bytenr(node) != bytenr) {
10512                 error(
10513         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10514                         bytenr, nodesize, bytenr,
10515                         btrfs_header_bytenr(node));
10516                 err |= REFERENCER_MISMATCH;
10517         }
10518         if (btrfs_header_level(node) != level) {
10519                 error(
10520         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10521                         bytenr, nodesize, level,
10522                         btrfs_header_level(node));
10523                 err |= REFERENCER_MISMATCH;
10524         }
10525
10526 release_out:
10527         btrfs_release_path(&path);
10528 out:
10529         if (err & REFERENCER_MISSING) {
10530                 if (level < 0)
10531                         error("extent [%llu %d] lost referencer (owner: %llu)",
10532                                 bytenr, nodesize, root_id);
10533                 else
10534                         error(
10535                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10536                                 bytenr, nodesize, root_id, level);
10537         }
10538
10539         return err;
10540 }
10541
10542 /*
10543  * Check if tree block @eb is tree reloc root.
10544  * Return 0 if it's not or any problem happens
10545  * Return 1 if it's a tree reloc root
10546  */
10547 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10548                                  struct extent_buffer *eb)
10549 {
10550         struct btrfs_root *tree_reloc_root;
10551         struct btrfs_key key;
10552         u64 bytenr = btrfs_header_bytenr(eb);
10553         u64 owner = btrfs_header_owner(eb);
10554         int ret = 0;
10555
10556         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10557         key.offset = owner;
10558         key.type = BTRFS_ROOT_ITEM_KEY;
10559
10560         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10561         if (IS_ERR(tree_reloc_root))
10562                 return 0;
10563
10564         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10565                 ret = 1;
10566         btrfs_free_fs_root(tree_reloc_root);
10567         return ret;
10568 }
10569
10570 /*
10571  * Check referencer for shared block backref
10572  * If level == -1, this function will resolve the level.
10573  */
10574 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10575                                      u64 parent, u64 bytenr, int level)
10576 {
10577         struct extent_buffer *eb;
10578         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10579         u32 nr;
10580         int found_parent = 0;
10581         int i;
10582
10583         eb = read_tree_block(fs_info, parent, nodesize, 0);
10584         if (!extent_buffer_uptodate(eb))
10585                 goto out;
10586
10587         if (level == -1)
10588                 level = query_tree_block_level(fs_info, bytenr);
10589         if (level < 0)
10590                 goto out;
10591
10592         /* It's possible it's a tree reloc root */
10593         if (parent == bytenr) {
10594                 if (is_tree_reloc_root(fs_info, eb))
10595                         found_parent = 1;
10596                 goto out;
10597         }
10598
10599         if (level + 1 != btrfs_header_level(eb))
10600                 goto out;
10601
10602         nr = btrfs_header_nritems(eb);
10603         for (i = 0; i < nr; i++) {
10604                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10605                         found_parent = 1;
10606                         break;
10607                 }
10608         }
10609 out:
10610         free_extent_buffer(eb);
10611         if (!found_parent) {
10612                 error(
10613         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10614                         bytenr, nodesize, parent, level);
10615                 return REFERENCER_MISSING;
10616         }
10617         return 0;
10618 }
10619
10620 /*
10621  * Check referencer for normal (inlined) data ref
10622  * If len == 0, it will be resolved by searching in extent tree
10623  */
10624 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10625                                      u64 root_id, u64 objectid, u64 offset,
10626                                      u64 bytenr, u64 len, u32 count)
10627 {
10628         struct btrfs_root *root;
10629         struct btrfs_root *extent_root = fs_info->extent_root;
10630         struct btrfs_key key;
10631         struct btrfs_path path;
10632         struct extent_buffer *leaf;
10633         struct btrfs_file_extent_item *fi;
10634         u32 found_count = 0;
10635         int slot;
10636         int ret = 0;
10637
10638         if (!len) {
10639                 key.objectid = bytenr;
10640                 key.type = BTRFS_EXTENT_ITEM_KEY;
10641                 key.offset = (u64)-1;
10642
10643                 btrfs_init_path(&path);
10644                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10645                 if (ret < 0)
10646                         goto out;
10647                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10648                 if (ret)
10649                         goto out;
10650                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10651                 if (key.objectid != bytenr ||
10652                     key.type != BTRFS_EXTENT_ITEM_KEY)
10653                         goto out;
10654                 len = key.offset;
10655                 btrfs_release_path(&path);
10656         }
10657         key.objectid = root_id;
10658         key.type = BTRFS_ROOT_ITEM_KEY;
10659         key.offset = (u64)-1;
10660         btrfs_init_path(&path);
10661
10662         root = btrfs_read_fs_root(fs_info, &key);
10663         if (IS_ERR(root))
10664                 goto out;
10665
10666         key.objectid = objectid;
10667         key.type = BTRFS_EXTENT_DATA_KEY;
10668         /*
10669          * It can be nasty as data backref offset is
10670          * file offset - file extent offset, which is smaller or
10671          * equal to original backref offset.  The only special case is
10672          * overflow.  So we need to special check and do further search.
10673          */
10674         key.offset = offset & (1ULL << 63) ? 0 : offset;
10675
10676         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10677         if (ret < 0)
10678                 goto out;
10679
10680         /*
10681          * Search afterwards to get correct one
10682          * NOTE: As we must do a comprehensive check on the data backref to
10683          * make sure the dref count also matches, we must iterate all file
10684          * extents for that inode.
10685          */
10686         while (1) {
10687                 leaf = path.nodes[0];
10688                 slot = path.slots[0];
10689
10690                 if (slot >= btrfs_header_nritems(leaf))
10691                         goto next;
10692                 btrfs_item_key_to_cpu(leaf, &key, slot);
10693                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10694                         break;
10695                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10696                 /*
10697                  * Except normal disk bytenr and disk num bytes, we still
10698                  * need to do extra check on dbackref offset as
10699                  * dbackref offset = file_offset - file_extent_offset
10700                  */
10701                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10702                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10703                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10704                     offset)
10705                         found_count++;
10706
10707 next:
10708                 ret = btrfs_next_item(root, &path);
10709                 if (ret)
10710                         break;
10711         }
10712 out:
10713         btrfs_release_path(&path);
10714         if (found_count != count) {
10715                 error(
10716 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10717                         bytenr, len, root_id, objectid, offset, count, found_count);
10718                 return REFERENCER_MISSING;
10719         }
10720         return 0;
10721 }
10722
10723 /*
10724  * Check if the referencer of a shared data backref exists
10725  */
10726 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10727                                      u64 parent, u64 bytenr)
10728 {
10729         struct extent_buffer *eb;
10730         struct btrfs_key key;
10731         struct btrfs_file_extent_item *fi;
10732         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10733         u32 nr;
10734         int found_parent = 0;
10735         int i;
10736
10737         eb = read_tree_block(fs_info, parent, nodesize, 0);
10738         if (!extent_buffer_uptodate(eb))
10739                 goto out;
10740
10741         nr = btrfs_header_nritems(eb);
10742         for (i = 0; i < nr; i++) {
10743                 btrfs_item_key_to_cpu(eb, &key, i);
10744                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10745                         continue;
10746
10747                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10748                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10749                         continue;
10750
10751                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10752                         found_parent = 1;
10753                         break;
10754                 }
10755         }
10756
10757 out:
10758         free_extent_buffer(eb);
10759         if (!found_parent) {
10760                 error("shared extent %llu referencer lost (parent: %llu)",
10761                         bytenr, parent);
10762                 return REFERENCER_MISSING;
10763         }
10764         return 0;
10765 }
10766
10767 /*
10768  * This function will check a given extent item, including its backref and
10769  * itself (like crossing stripe boundary and type)
10770  *
10771  * Since we don't use extent_record anymore, introduce new error bit
10772  */
10773 static int check_extent_item(struct btrfs_fs_info *fs_info,
10774                              struct extent_buffer *eb, int slot)
10775 {
10776         struct btrfs_extent_item *ei;
10777         struct btrfs_extent_inline_ref *iref;
10778         struct btrfs_extent_data_ref *dref;
10779         unsigned long end;
10780         unsigned long ptr;
10781         int type;
10782         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10783         u32 item_size = btrfs_item_size_nr(eb, slot);
10784         u64 flags;
10785         u64 offset;
10786         int metadata = 0;
10787         int level;
10788         struct btrfs_key key;
10789         int ret;
10790         int err = 0;
10791
10792         btrfs_item_key_to_cpu(eb, &key, slot);
10793         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10794                 bytes_used += key.offset;
10795         else
10796                 bytes_used += nodesize;
10797
10798         if (item_size < sizeof(*ei)) {
10799                 /*
10800                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10801                  * old thing when on disk format is still un-determined.
10802                  * No need to care about it anymore
10803                  */
10804                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10805                 return -ENOTTY;
10806         }
10807
10808         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10809         flags = btrfs_extent_flags(eb, ei);
10810
10811         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10812                 metadata = 1;
10813         if (metadata && check_crossing_stripes(global_info, key.objectid,
10814                                                eb->len)) {
10815                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10816                       key.objectid, key.objectid + nodesize);
10817                 err |= CROSSING_STRIPE_BOUNDARY;
10818         }
10819
10820         ptr = (unsigned long)(ei + 1);
10821
10822         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10823                 /* Old EXTENT_ITEM metadata */
10824                 struct btrfs_tree_block_info *info;
10825
10826                 info = (struct btrfs_tree_block_info *)ptr;
10827                 level = btrfs_tree_block_level(eb, info);
10828                 ptr += sizeof(struct btrfs_tree_block_info);
10829         } else {
10830                 /* New METADATA_ITEM */
10831                 level = key.offset;
10832         }
10833         end = (unsigned long)ei + item_size;
10834
10835 next:
10836         /* Reached extent item end normally */
10837         if (ptr == end)
10838                 goto out;
10839
10840         /* Beyond extent item end, wrong item size */
10841         if (ptr > end) {
10842                 err |= ITEM_SIZE_MISMATCH;
10843                 error("extent item at bytenr %llu slot %d has wrong size",
10844                         eb->start, slot);
10845                 goto out;
10846         }
10847
10848         /* Now check every backref in this extent item */
10849         iref = (struct btrfs_extent_inline_ref *)ptr;
10850         type = btrfs_extent_inline_ref_type(eb, iref);
10851         offset = btrfs_extent_inline_ref_offset(eb, iref);
10852         switch (type) {
10853         case BTRFS_TREE_BLOCK_REF_KEY:
10854                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10855                                                level);
10856                 err |= ret;
10857                 break;
10858         case BTRFS_SHARED_BLOCK_REF_KEY:
10859                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10860                                                  level);
10861                 err |= ret;
10862                 break;
10863         case BTRFS_EXTENT_DATA_REF_KEY:
10864                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10865                 ret = check_extent_data_backref(fs_info,
10866                                 btrfs_extent_data_ref_root(eb, dref),
10867                                 btrfs_extent_data_ref_objectid(eb, dref),
10868                                 btrfs_extent_data_ref_offset(eb, dref),
10869                                 key.objectid, key.offset,
10870                                 btrfs_extent_data_ref_count(eb, dref));
10871                 err |= ret;
10872                 break;
10873         case BTRFS_SHARED_DATA_REF_KEY:
10874                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10875                 err |= ret;
10876                 break;
10877         default:
10878                 error("extent[%llu %d %llu] has unknown ref type: %d",
10879                         key.objectid, key.type, key.offset, type);
10880                 err |= UNKNOWN_TYPE;
10881                 goto out;
10882         }
10883
10884         ptr += btrfs_extent_inline_ref_size(type);
10885         goto next;
10886
10887 out:
10888         return err;
10889 }
10890
10891 /*
10892  * Check if a dev extent item is referred correctly by its chunk
10893  */
10894 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10895                                  struct extent_buffer *eb, int slot)
10896 {
10897         struct btrfs_root *chunk_root = fs_info->chunk_root;
10898         struct btrfs_dev_extent *ptr;
10899         struct btrfs_path path;
10900         struct btrfs_key chunk_key;
10901         struct btrfs_key devext_key;
10902         struct btrfs_chunk *chunk;
10903         struct extent_buffer *l;
10904         int num_stripes;
10905         u64 length;
10906         int i;
10907         int found_chunk = 0;
10908         int ret;
10909
10910         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10911         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10912         length = btrfs_dev_extent_length(eb, ptr);
10913
10914         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10915         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10916         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10917
10918         btrfs_init_path(&path);
10919         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10920         if (ret)
10921                 goto out;
10922
10923         l = path.nodes[0];
10924         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10925         ret = btrfs_check_chunk_valid(chunk_root, l, chunk, path.slots[0],
10926                                       chunk_key.offset);
10927         if (ret < 0)
10928                 goto out;
10929
10930         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10931                 goto out;
10932
10933         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10934         for (i = 0; i < num_stripes; i++) {
10935                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10936                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10937
10938                 if (devid == devext_key.objectid &&
10939                     offset == devext_key.offset) {
10940                         found_chunk = 1;
10941                         break;
10942                 }
10943         }
10944 out:
10945         btrfs_release_path(&path);
10946         if (!found_chunk) {
10947                 error(
10948                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10949                         devext_key.objectid, devext_key.offset, length);
10950                 return REFERENCER_MISSING;
10951         }
10952         return 0;
10953 }
10954
10955 /*
10956  * Check if the used space is correct with the dev item
10957  */
10958 static int check_dev_item(struct btrfs_fs_info *fs_info,
10959                           struct extent_buffer *eb, int slot)
10960 {
10961         struct btrfs_root *dev_root = fs_info->dev_root;
10962         struct btrfs_dev_item *dev_item;
10963         struct btrfs_path path;
10964         struct btrfs_key key;
10965         struct btrfs_dev_extent *ptr;
10966         u64 dev_id;
10967         u64 used;
10968         u64 total = 0;
10969         int ret;
10970
10971         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10972         dev_id = btrfs_device_id(eb, dev_item);
10973         used = btrfs_device_bytes_used(eb, dev_item);
10974
10975         key.objectid = dev_id;
10976         key.type = BTRFS_DEV_EXTENT_KEY;
10977         key.offset = 0;
10978
10979         btrfs_init_path(&path);
10980         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10981         if (ret < 0) {
10982                 btrfs_item_key_to_cpu(eb, &key, slot);
10983                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10984                         key.objectid, key.type, key.offset);
10985                 btrfs_release_path(&path);
10986                 return REFERENCER_MISSING;
10987         }
10988
10989         /* Iterate dev_extents to calculate the used space of a device */
10990         while (1) {
10991                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10992                         goto next;
10993
10994                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10995                 if (key.objectid > dev_id)
10996                         break;
10997                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10998                         goto next;
10999
11000                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11001                                      struct btrfs_dev_extent);
11002                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11003 next:
11004                 ret = btrfs_next_item(dev_root, &path);
11005                 if (ret)
11006                         break;
11007         }
11008         btrfs_release_path(&path);
11009
11010         if (used != total) {
11011                 btrfs_item_key_to_cpu(eb, &key, slot);
11012                 error(
11013 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11014                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11015                         BTRFS_DEV_EXTENT_KEY, dev_id);
11016                 return ACCOUNTING_MISMATCH;
11017         }
11018         return 0;
11019 }
11020
11021 /*
11022  * Check a block group item with its referener (chunk) and its used space
11023  * with extent/metadata item
11024  */
11025 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11026                                   struct extent_buffer *eb, int slot)
11027 {
11028         struct btrfs_root *extent_root = fs_info->extent_root;
11029         struct btrfs_root *chunk_root = fs_info->chunk_root;
11030         struct btrfs_block_group_item *bi;
11031         struct btrfs_block_group_item bg_item;
11032         struct btrfs_path path;
11033         struct btrfs_key bg_key;
11034         struct btrfs_key chunk_key;
11035         struct btrfs_key extent_key;
11036         struct btrfs_chunk *chunk;
11037         struct extent_buffer *leaf;
11038         struct btrfs_extent_item *ei;
11039         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11040         u64 flags;
11041         u64 bg_flags;
11042         u64 used;
11043         u64 total = 0;
11044         int ret;
11045         int err = 0;
11046
11047         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11048         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11049         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11050         used = btrfs_block_group_used(&bg_item);
11051         bg_flags = btrfs_block_group_flags(&bg_item);
11052
11053         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11054         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11055         chunk_key.offset = bg_key.objectid;
11056
11057         btrfs_init_path(&path);
11058         /* Search for the referencer chunk */
11059         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11060         if (ret) {
11061                 error(
11062                 "block group[%llu %llu] did not find the related chunk item",
11063                         bg_key.objectid, bg_key.offset);
11064                 err |= REFERENCER_MISSING;
11065         } else {
11066                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11067                                         struct btrfs_chunk);
11068                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11069                                                 bg_key.offset) {
11070                         error(
11071         "block group[%llu %llu] related chunk item length does not match",
11072                                 bg_key.objectid, bg_key.offset);
11073                         err |= REFERENCER_MISMATCH;
11074                 }
11075         }
11076         btrfs_release_path(&path);
11077
11078         /* Search from the block group bytenr */
11079         extent_key.objectid = bg_key.objectid;
11080         extent_key.type = 0;
11081         extent_key.offset = 0;
11082
11083         btrfs_init_path(&path);
11084         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11085         if (ret < 0)
11086                 goto out;
11087
11088         /* Iterate extent tree to account used space */
11089         while (1) {
11090                 leaf = path.nodes[0];
11091
11092                 /* Search slot can point to the last item beyond leaf nritems */
11093                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11094                         goto next;
11095
11096                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11097                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11098                         break;
11099
11100                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11101                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11102                         goto next;
11103                 if (extent_key.objectid < bg_key.objectid)
11104                         goto next;
11105
11106                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11107                         total += nodesize;
11108                 else
11109                         total += extent_key.offset;
11110
11111                 ei = btrfs_item_ptr(leaf, path.slots[0],
11112                                     struct btrfs_extent_item);
11113                 flags = btrfs_extent_flags(leaf, ei);
11114                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11115                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11116                                 error(
11117                         "bad extent[%llu, %llu) type mismatch with chunk",
11118                                         extent_key.objectid,
11119                                         extent_key.objectid + extent_key.offset);
11120                                 err |= CHUNK_TYPE_MISMATCH;
11121                         }
11122                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11123                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11124                                     BTRFS_BLOCK_GROUP_METADATA))) {
11125                                 error(
11126                         "bad extent[%llu, %llu) type mismatch with chunk",
11127                                         extent_key.objectid,
11128                                         extent_key.objectid + nodesize);
11129                                 err |= CHUNK_TYPE_MISMATCH;
11130                         }
11131                 }
11132 next:
11133                 ret = btrfs_next_item(extent_root, &path);
11134                 if (ret)
11135                         break;
11136         }
11137
11138 out:
11139         btrfs_release_path(&path);
11140
11141         if (total != used) {
11142                 error(
11143                 "block group[%llu %llu] used %llu but extent items used %llu",
11144                         bg_key.objectid, bg_key.offset, used, total);
11145                 err |= ACCOUNTING_MISMATCH;
11146         }
11147         return err;
11148 }
11149
11150 /*
11151  * Check a chunk item.
11152  * Including checking all referred dev_extents and block group
11153  */
11154 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11155                             struct extent_buffer *eb, int slot)
11156 {
11157         struct btrfs_root *extent_root = fs_info->extent_root;
11158         struct btrfs_root *dev_root = fs_info->dev_root;
11159         struct btrfs_path path;
11160         struct btrfs_key chunk_key;
11161         struct btrfs_key bg_key;
11162         struct btrfs_key devext_key;
11163         struct btrfs_chunk *chunk;
11164         struct extent_buffer *leaf;
11165         struct btrfs_block_group_item *bi;
11166         struct btrfs_block_group_item bg_item;
11167         struct btrfs_dev_extent *ptr;
11168         u64 length;
11169         u64 chunk_end;
11170         u64 stripe_len;
11171         u64 type;
11172         int num_stripes;
11173         u64 offset;
11174         u64 objectid;
11175         int i;
11176         int ret;
11177         int err = 0;
11178
11179         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11180         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11181         length = btrfs_chunk_length(eb, chunk);
11182         chunk_end = chunk_key.offset + length;
11183         ret = btrfs_check_chunk_valid(extent_root, eb, chunk, slot,
11184                                       chunk_key.offset);
11185         if (ret < 0) {
11186                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11187                         chunk_end);
11188                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11189                 goto out;
11190         }
11191         type = btrfs_chunk_type(eb, chunk);
11192
11193         bg_key.objectid = chunk_key.offset;
11194         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11195         bg_key.offset = length;
11196
11197         btrfs_init_path(&path);
11198         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11199         if (ret) {
11200                 error(
11201                 "chunk[%llu %llu) did not find the related block group item",
11202                         chunk_key.offset, chunk_end);
11203                 err |= REFERENCER_MISSING;
11204         } else{
11205                 leaf = path.nodes[0];
11206                 bi = btrfs_item_ptr(leaf, path.slots[0],
11207                                     struct btrfs_block_group_item);
11208                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11209                                    sizeof(bg_item));
11210                 if (btrfs_block_group_flags(&bg_item) != type) {
11211                         error(
11212 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11213                                 chunk_key.offset, chunk_end, type,
11214                                 btrfs_block_group_flags(&bg_item));
11215                         err |= REFERENCER_MISSING;
11216                 }
11217         }
11218
11219         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11220         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11221         for (i = 0; i < num_stripes; i++) {
11222                 btrfs_release_path(&path);
11223                 btrfs_init_path(&path);
11224                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11225                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11226                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11227
11228                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11229                                         0, 0);
11230                 if (ret)
11231                         goto not_match_dev;
11232
11233                 leaf = path.nodes[0];
11234                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11235                                      struct btrfs_dev_extent);
11236                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11237                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11238                 if (objectid != chunk_key.objectid ||
11239                     offset != chunk_key.offset ||
11240                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11241                         goto not_match_dev;
11242                 continue;
11243 not_match_dev:
11244                 err |= BACKREF_MISSING;
11245                 error(
11246                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11247                         chunk_key.objectid, chunk_end, i);
11248                 continue;
11249         }
11250         btrfs_release_path(&path);
11251 out:
11252         return err;
11253 }
11254
11255 /*
11256  * Main entry function to check known items and update related accounting info
11257  */
11258 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11259 {
11260         struct btrfs_fs_info *fs_info = root->fs_info;
11261         struct btrfs_key key;
11262         int slot = 0;
11263         int type;
11264         struct btrfs_extent_data_ref *dref;
11265         int ret;
11266         int err = 0;
11267
11268 next:
11269         btrfs_item_key_to_cpu(eb, &key, slot);
11270         type = key.type;
11271
11272         switch (type) {
11273         case BTRFS_EXTENT_DATA_KEY:
11274                 ret = check_extent_data_item(root, eb, slot);
11275                 err |= ret;
11276                 break;
11277         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11278                 ret = check_block_group_item(fs_info, eb, slot);
11279                 err |= ret;
11280                 break;
11281         case BTRFS_DEV_ITEM_KEY:
11282                 ret = check_dev_item(fs_info, eb, slot);
11283                 err |= ret;
11284                 break;
11285         case BTRFS_CHUNK_ITEM_KEY:
11286                 ret = check_chunk_item(fs_info, eb, slot);
11287                 err |= ret;
11288                 break;
11289         case BTRFS_DEV_EXTENT_KEY:
11290                 ret = check_dev_extent_item(fs_info, eb, slot);
11291                 err |= ret;
11292                 break;
11293         case BTRFS_EXTENT_ITEM_KEY:
11294         case BTRFS_METADATA_ITEM_KEY:
11295                 ret = check_extent_item(fs_info, eb, slot);
11296                 err |= ret;
11297                 break;
11298         case BTRFS_EXTENT_CSUM_KEY:
11299                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11300                 break;
11301         case BTRFS_TREE_BLOCK_REF_KEY:
11302                 ret = check_tree_block_backref(fs_info, key.offset,
11303                                                key.objectid, -1);
11304                 err |= ret;
11305                 break;
11306         case BTRFS_EXTENT_DATA_REF_KEY:
11307                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11308                 ret = check_extent_data_backref(fs_info,
11309                                 btrfs_extent_data_ref_root(eb, dref),
11310                                 btrfs_extent_data_ref_objectid(eb, dref),
11311                                 btrfs_extent_data_ref_offset(eb, dref),
11312                                 key.objectid, 0,
11313                                 btrfs_extent_data_ref_count(eb, dref));
11314                 err |= ret;
11315                 break;
11316         case BTRFS_SHARED_BLOCK_REF_KEY:
11317                 ret = check_shared_block_backref(fs_info, key.offset,
11318                                                  key.objectid, -1);
11319                 err |= ret;
11320                 break;
11321         case BTRFS_SHARED_DATA_REF_KEY:
11322                 ret = check_shared_data_backref(fs_info, key.offset,
11323                                                 key.objectid);
11324                 err |= ret;
11325                 break;
11326         default:
11327                 break;
11328         }
11329
11330         if (++slot < btrfs_header_nritems(eb))
11331                 goto next;
11332
11333         return err;
11334 }
11335
11336 /*
11337  * Helper function for later fs/subvol tree check.  To determine if a tree
11338  * block should be checked.
11339  * This function will ensure only the direct referencer with lowest rootid to
11340  * check a fs/subvolume tree block.
11341  *
11342  * Backref check at extent tree would detect errors like missing subvolume
11343  * tree, so we can do aggressive check to reduce duplicated checks.
11344  */
11345 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11346 {
11347         struct btrfs_root *extent_root = root->fs_info->extent_root;
11348         struct btrfs_key key;
11349         struct btrfs_path path;
11350         struct extent_buffer *leaf;
11351         int slot;
11352         struct btrfs_extent_item *ei;
11353         unsigned long ptr;
11354         unsigned long end;
11355         int type;
11356         u32 item_size;
11357         u64 offset;
11358         struct btrfs_extent_inline_ref *iref;
11359         int ret;
11360
11361         btrfs_init_path(&path);
11362         key.objectid = btrfs_header_bytenr(eb);
11363         key.type = BTRFS_METADATA_ITEM_KEY;
11364         key.offset = (u64)-1;
11365
11366         /*
11367          * Any failure in backref resolving means we can't determine
11368          * whom the tree block belongs to.
11369          * So in that case, we need to check that tree block
11370          */
11371         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11372         if (ret < 0)
11373                 goto need_check;
11374
11375         ret = btrfs_previous_extent_item(extent_root, &path,
11376                                          btrfs_header_bytenr(eb));
11377         if (ret)
11378                 goto need_check;
11379
11380         leaf = path.nodes[0];
11381         slot = path.slots[0];
11382         btrfs_item_key_to_cpu(leaf, &key, slot);
11383         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11384
11385         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11386                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11387         } else {
11388                 struct btrfs_tree_block_info *info;
11389
11390                 info = (struct btrfs_tree_block_info *)(ei + 1);
11391                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11392         }
11393
11394         item_size = btrfs_item_size_nr(leaf, slot);
11395         ptr = (unsigned long)iref;
11396         end = (unsigned long)ei + item_size;
11397         while (ptr < end) {
11398                 iref = (struct btrfs_extent_inline_ref *)ptr;
11399                 type = btrfs_extent_inline_ref_type(leaf, iref);
11400                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11401
11402                 /*
11403                  * We only check the tree block if current root is
11404                  * the lowest referencer of it.
11405                  */
11406                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11407                     offset < root->objectid) {
11408                         btrfs_release_path(&path);
11409                         return 0;
11410                 }
11411
11412                 ptr += btrfs_extent_inline_ref_size(type);
11413         }
11414         /*
11415          * Normally we should also check keyed tree block ref, but that may be
11416          * very time consuming.  Inlined ref should already make us skip a lot
11417          * of refs now.  So skip search keyed tree block ref.
11418          */
11419
11420 need_check:
11421         btrfs_release_path(&path);
11422         return 1;
11423 }
11424
11425 /*
11426  * Traversal function for tree block. We will do:
11427  * 1) Skip shared fs/subvolume tree blocks
11428  * 2) Update related bytes accounting
11429  * 3) Pre-order traversal
11430  */
11431 static int traverse_tree_block(struct btrfs_root *root,
11432                                 struct extent_buffer *node)
11433 {
11434         struct extent_buffer *eb;
11435         struct btrfs_key key;
11436         struct btrfs_key drop_key;
11437         int level;
11438         u64 nr;
11439         int i;
11440         int err = 0;
11441         int ret;
11442
11443         /*
11444          * Skip shared fs/subvolume tree block, in that case they will
11445          * be checked by referencer with lowest rootid
11446          */
11447         if (is_fstree(root->objectid) && !should_check(root, node))
11448                 return 0;
11449
11450         /* Update bytes accounting */
11451         total_btree_bytes += node->len;
11452         if (fs_root_objectid(btrfs_header_owner(node)))
11453                 total_fs_tree_bytes += node->len;
11454         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11455                 total_extent_tree_bytes += node->len;
11456         if (!found_old_backref &&
11457             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11458             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11459             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11460                 found_old_backref = 1;
11461
11462         /* pre-order tranversal, check itself first */
11463         level = btrfs_header_level(node);
11464         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11465                                    btrfs_header_level(node),
11466                                    btrfs_header_owner(node));
11467         err |= ret;
11468         if (err)
11469                 error(
11470         "check %s failed root %llu bytenr %llu level %d, force continue check",
11471                         level ? "node":"leaf", root->objectid,
11472                         btrfs_header_bytenr(node), btrfs_header_level(node));
11473
11474         if (!level) {
11475                 btree_space_waste += btrfs_leaf_free_space(root, node);
11476                 ret = check_leaf_items(root, node);
11477                 err |= ret;
11478                 return err;
11479         }
11480
11481         nr = btrfs_header_nritems(node);
11482         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11483         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11484                 sizeof(struct btrfs_key_ptr);
11485
11486         /* Then check all its children */
11487         for (i = 0; i < nr; i++) {
11488                 u64 blocknr = btrfs_node_blockptr(node, i);
11489
11490                 btrfs_node_key_to_cpu(node, &key, i);
11491                 if (level == root->root_item.drop_level &&
11492                     is_dropped_key(&key, &drop_key))
11493                         continue;
11494
11495                 /*
11496                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11497                  * to call the function itself.
11498                  */
11499                 eb = read_tree_block(root->fs_info, blocknr,
11500                                 root->fs_info->nodesize, 0);
11501                 if (extent_buffer_uptodate(eb)) {
11502                         ret = traverse_tree_block(root, eb);
11503                         err |= ret;
11504                 }
11505                 free_extent_buffer(eb);
11506         }
11507
11508         return err;
11509 }
11510
11511 /*
11512  * Low memory usage version check_chunks_and_extents.
11513  */
11514 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11515 {
11516         struct btrfs_path path;
11517         struct btrfs_key key;
11518         struct btrfs_root *root1;
11519         struct btrfs_root *cur_root;
11520         int err = 0;
11521         int ret;
11522
11523         root1 = root->fs_info->chunk_root;
11524         ret = traverse_tree_block(root1, root1->node);
11525         err |= ret;
11526
11527         root1 = root->fs_info->tree_root;
11528         ret = traverse_tree_block(root1, root1->node);
11529         err |= ret;
11530
11531         btrfs_init_path(&path);
11532         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11533         key.offset = 0;
11534         key.type = BTRFS_ROOT_ITEM_KEY;
11535
11536         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11537         if (ret) {
11538                 error("cannot find extent treet in tree_root");
11539                 goto out;
11540         }
11541
11542         while (1) {
11543                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11544                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11545                         goto next;
11546                 key.offset = (u64)-1;
11547
11548                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11549                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11550                                         &key);
11551                 else
11552                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11553                 if (IS_ERR(cur_root) || !cur_root) {
11554                         error("failed to read tree: %lld", key.objectid);
11555                         goto next;
11556                 }
11557
11558                 ret = traverse_tree_block(cur_root, cur_root->node);
11559                 err |= ret;
11560
11561                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11562                         btrfs_free_fs_root(cur_root);
11563 next:
11564                 ret = btrfs_next_item(root1, &path);
11565                 if (ret)
11566                         goto out;
11567         }
11568
11569 out:
11570         btrfs_release_path(&path);
11571         return err;
11572 }
11573
11574 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11575                            struct btrfs_root *root, int overwrite)
11576 {
11577         struct extent_buffer *c;
11578         struct extent_buffer *old = root->node;
11579         int level;
11580         int ret;
11581         struct btrfs_disk_key disk_key = {0,0,0};
11582
11583         level = 0;
11584
11585         if (overwrite) {
11586                 c = old;
11587                 extent_buffer_get(c);
11588                 goto init;
11589         }
11590         c = btrfs_alloc_free_block(trans, root,
11591                                    root->fs_info->nodesize,
11592                                    root->root_key.objectid,
11593                                    &disk_key, level, 0, 0);
11594         if (IS_ERR(c)) {
11595                 c = old;
11596                 extent_buffer_get(c);
11597                 overwrite = 1;
11598         }
11599 init:
11600         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11601         btrfs_set_header_level(c, level);
11602         btrfs_set_header_bytenr(c, c->start);
11603         btrfs_set_header_generation(c, trans->transid);
11604         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11605         btrfs_set_header_owner(c, root->root_key.objectid);
11606
11607         write_extent_buffer(c, root->fs_info->fsid,
11608                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11609
11610         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11611                             btrfs_header_chunk_tree_uuid(c),
11612                             BTRFS_UUID_SIZE);
11613
11614         btrfs_mark_buffer_dirty(c);
11615         /*
11616          * this case can happen in the following case:
11617          *
11618          * 1.overwrite previous root.
11619          *
11620          * 2.reinit reloc data root, this is because we skip pin
11621          * down reloc data tree before which means we can allocate
11622          * same block bytenr here.
11623          */
11624         if (old->start == c->start) {
11625                 btrfs_set_root_generation(&root->root_item,
11626                                           trans->transid);
11627                 root->root_item.level = btrfs_header_level(root->node);
11628                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11629                                         &root->root_key, &root->root_item);
11630                 if (ret) {
11631                         free_extent_buffer(c);
11632                         return ret;
11633                 }
11634         }
11635         free_extent_buffer(old);
11636         root->node = c;
11637         add_root_to_dirty_list(root);
11638         return 0;
11639 }
11640
11641 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11642                                 struct extent_buffer *eb, int tree_root)
11643 {
11644         struct extent_buffer *tmp;
11645         struct btrfs_root_item *ri;
11646         struct btrfs_key key;
11647         u64 bytenr;
11648         u32 nodesize;
11649         int level = btrfs_header_level(eb);
11650         int nritems;
11651         int ret;
11652         int i;
11653
11654         /*
11655          * If we have pinned this block before, don't pin it again.
11656          * This can not only avoid forever loop with broken filesystem
11657          * but also give us some speedups.
11658          */
11659         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11660                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11661                 return 0;
11662
11663         btrfs_pin_extent(fs_info, eb->start, eb->len);
11664
11665         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11666         nritems = btrfs_header_nritems(eb);
11667         for (i = 0; i < nritems; i++) {
11668                 if (level == 0) {
11669                         btrfs_item_key_to_cpu(eb, &key, i);
11670                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11671                                 continue;
11672                         /* Skip the extent root and reloc roots */
11673                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11674                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11675                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11676                                 continue;
11677                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11678                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11679
11680                         /*
11681                          * If at any point we start needing the real root we
11682                          * will have to build a stump root for the root we are
11683                          * in, but for now this doesn't actually use the root so
11684                          * just pass in extent_root.
11685                          */
11686                         tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11687                         if (!extent_buffer_uptodate(tmp)) {
11688                                 fprintf(stderr, "Error reading root block\n");
11689                                 return -EIO;
11690                         }
11691                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11692                         free_extent_buffer(tmp);
11693                         if (ret)
11694                                 return ret;
11695                 } else {
11696                         bytenr = btrfs_node_blockptr(eb, i);
11697
11698                         /* If we aren't the tree root don't read the block */
11699                         if (level == 1 && !tree_root) {
11700                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11701                                 continue;
11702                         }
11703
11704                         tmp = read_tree_block(fs_info, bytenr,
11705                                               nodesize, 0);
11706                         if (!extent_buffer_uptodate(tmp)) {
11707                                 fprintf(stderr, "Error reading tree block\n");
11708                                 return -EIO;
11709                         }
11710                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11711                         free_extent_buffer(tmp);
11712                         if (ret)
11713                                 return ret;
11714                 }
11715         }
11716
11717         return 0;
11718 }
11719
11720 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11721 {
11722         int ret;
11723
11724         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11725         if (ret)
11726                 return ret;
11727
11728         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11729 }
11730
11731 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11732 {
11733         struct btrfs_block_group_cache *cache;
11734         struct btrfs_path path;
11735         struct extent_buffer *leaf;
11736         struct btrfs_chunk *chunk;
11737         struct btrfs_key key;
11738         int ret;
11739         u64 start;
11740
11741         btrfs_init_path(&path);
11742         key.objectid = 0;
11743         key.type = BTRFS_CHUNK_ITEM_KEY;
11744         key.offset = 0;
11745         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11746         if (ret < 0) {
11747                 btrfs_release_path(&path);
11748                 return ret;
11749         }
11750
11751         /*
11752          * We do this in case the block groups were screwed up and had alloc
11753          * bits that aren't actually set on the chunks.  This happens with
11754          * restored images every time and could happen in real life I guess.
11755          */
11756         fs_info->avail_data_alloc_bits = 0;
11757         fs_info->avail_metadata_alloc_bits = 0;
11758         fs_info->avail_system_alloc_bits = 0;
11759
11760         /* First we need to create the in-memory block groups */
11761         while (1) {
11762                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11763                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11764                         if (ret < 0) {
11765                                 btrfs_release_path(&path);
11766                                 return ret;
11767                         }
11768                         if (ret) {
11769                                 ret = 0;
11770                                 break;
11771                         }
11772                 }
11773                 leaf = path.nodes[0];
11774                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11775                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11776                         path.slots[0]++;
11777                         continue;
11778                 }
11779
11780                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11781                 btrfs_add_block_group(fs_info, 0,
11782                                       btrfs_chunk_type(leaf, chunk),
11783                                       key.objectid, key.offset,
11784                                       btrfs_chunk_length(leaf, chunk));
11785                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11786                                  key.offset + btrfs_chunk_length(leaf, chunk));
11787                 path.slots[0]++;
11788         }
11789         start = 0;
11790         while (1) {
11791                 cache = btrfs_lookup_first_block_group(fs_info, start);
11792                 if (!cache)
11793                         break;
11794                 cache->cached = 1;
11795                 start = cache->key.objectid + cache->key.offset;
11796         }
11797
11798         btrfs_release_path(&path);
11799         return 0;
11800 }
11801
11802 static int reset_balance(struct btrfs_trans_handle *trans,
11803                          struct btrfs_fs_info *fs_info)
11804 {
11805         struct btrfs_root *root = fs_info->tree_root;
11806         struct btrfs_path path;
11807         struct extent_buffer *leaf;
11808         struct btrfs_key key;
11809         int del_slot, del_nr = 0;
11810         int ret;
11811         int found = 0;
11812
11813         btrfs_init_path(&path);
11814         key.objectid = BTRFS_BALANCE_OBJECTID;
11815         key.type = BTRFS_BALANCE_ITEM_KEY;
11816         key.offset = 0;
11817         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11818         if (ret) {
11819                 if (ret > 0)
11820                         ret = 0;
11821                 if (!ret)
11822                         goto reinit_data_reloc;
11823                 else
11824                         goto out;
11825         }
11826
11827         ret = btrfs_del_item(trans, root, &path);
11828         if (ret)
11829                 goto out;
11830         btrfs_release_path(&path);
11831
11832         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11833         key.type = BTRFS_ROOT_ITEM_KEY;
11834         key.offset = 0;
11835         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11836         if (ret < 0)
11837                 goto out;
11838         while (1) {
11839                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11840                         if (!found)
11841                                 break;
11842
11843                         if (del_nr) {
11844                                 ret = btrfs_del_items(trans, root, &path,
11845                                                       del_slot, del_nr);
11846                                 del_nr = 0;
11847                                 if (ret)
11848                                         goto out;
11849                         }
11850                         key.offset++;
11851                         btrfs_release_path(&path);
11852
11853                         found = 0;
11854                         ret = btrfs_search_slot(trans, root, &key, &path,
11855                                                 -1, 1);
11856                         if (ret < 0)
11857                                 goto out;
11858                         continue;
11859                 }
11860                 found = 1;
11861                 leaf = path.nodes[0];
11862                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11863                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11864                         break;
11865                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11866                         path.slots[0]++;
11867                         continue;
11868                 }
11869                 if (!del_nr) {
11870                         del_slot = path.slots[0];
11871                         del_nr = 1;
11872                 } else {
11873                         del_nr++;
11874                 }
11875                 path.slots[0]++;
11876         }
11877
11878         if (del_nr) {
11879                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11880                 if (ret)
11881                         goto out;
11882         }
11883         btrfs_release_path(&path);
11884
11885 reinit_data_reloc:
11886         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11887         key.type = BTRFS_ROOT_ITEM_KEY;
11888         key.offset = (u64)-1;
11889         root = btrfs_read_fs_root(fs_info, &key);
11890         if (IS_ERR(root)) {
11891                 fprintf(stderr, "Error reading data reloc tree\n");
11892                 ret = PTR_ERR(root);
11893                 goto out;
11894         }
11895         record_root_in_trans(trans, root);
11896         ret = btrfs_fsck_reinit_root(trans, root, 0);
11897         if (ret)
11898                 goto out;
11899         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11900 out:
11901         btrfs_release_path(&path);
11902         return ret;
11903 }
11904
11905 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11906                               struct btrfs_fs_info *fs_info)
11907 {
11908         u64 start = 0;
11909         int ret;
11910
11911         /*
11912          * The only reason we don't do this is because right now we're just
11913          * walking the trees we find and pinning down their bytes, we don't look
11914          * at any of the leaves.  In order to do mixed groups we'd have to check
11915          * the leaves of any fs roots and pin down the bytes for any file
11916          * extents we find.  Not hard but why do it if we don't have to?
11917          */
11918         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11919                 fprintf(stderr, "We don't support re-initing the extent tree "
11920                         "for mixed block groups yet, please notify a btrfs "
11921                         "developer you want to do this so they can add this "
11922                         "functionality.\n");
11923                 return -EINVAL;
11924         }
11925
11926         /*
11927          * first we need to walk all of the trees except the extent tree and pin
11928          * down the bytes that are in use so we don't overwrite any existing
11929          * metadata.
11930          */
11931         ret = pin_metadata_blocks(fs_info);
11932         if (ret) {
11933                 fprintf(stderr, "error pinning down used bytes\n");
11934                 return ret;
11935         }
11936
11937         /*
11938          * Need to drop all the block groups since we're going to recreate all
11939          * of them again.
11940          */
11941         btrfs_free_block_groups(fs_info);
11942         ret = reset_block_groups(fs_info);
11943         if (ret) {
11944                 fprintf(stderr, "error resetting the block groups\n");
11945                 return ret;
11946         }
11947
11948         /* Ok we can allocate now, reinit the extent root */
11949         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11950         if (ret) {
11951                 fprintf(stderr, "extent root initialization failed\n");
11952                 /*
11953                  * When the transaction code is updated we should end the
11954                  * transaction, but for now progs only knows about commit so
11955                  * just return an error.
11956                  */
11957                 return ret;
11958         }
11959
11960         /*
11961          * Now we have all the in-memory block groups setup so we can make
11962          * allocations properly, and the metadata we care about is safe since we
11963          * pinned all of it above.
11964          */
11965         while (1) {
11966                 struct btrfs_block_group_cache *cache;
11967
11968                 cache = btrfs_lookup_first_block_group(fs_info, start);
11969                 if (!cache)
11970                         break;
11971                 start = cache->key.objectid + cache->key.offset;
11972                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11973                                         &cache->key, &cache->item,
11974                                         sizeof(cache->item));
11975                 if (ret) {
11976                         fprintf(stderr, "Error adding block group\n");
11977                         return ret;
11978                 }
11979                 btrfs_extent_post_op(trans, fs_info->extent_root);
11980         }
11981
11982         ret = reset_balance(trans, fs_info);
11983         if (ret)
11984                 fprintf(stderr, "error resetting the pending balance\n");
11985
11986         return ret;
11987 }
11988
11989 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11990 {
11991         struct btrfs_path path;
11992         struct btrfs_trans_handle *trans;
11993         struct btrfs_key key;
11994         int ret;
11995
11996         printf("Recowing metadata block %llu\n", eb->start);
11997         key.objectid = btrfs_header_owner(eb);
11998         key.type = BTRFS_ROOT_ITEM_KEY;
11999         key.offset = (u64)-1;
12000
12001         root = btrfs_read_fs_root(root->fs_info, &key);
12002         if (IS_ERR(root)) {
12003                 fprintf(stderr, "Couldn't find owner root %llu\n",
12004                         key.objectid);
12005                 return PTR_ERR(root);
12006         }
12007
12008         trans = btrfs_start_transaction(root, 1);
12009         if (IS_ERR(trans))
12010                 return PTR_ERR(trans);
12011
12012         btrfs_init_path(&path);
12013         path.lowest_level = btrfs_header_level(eb);
12014         if (path.lowest_level)
12015                 btrfs_node_key_to_cpu(eb, &key, 0);
12016         else
12017                 btrfs_item_key_to_cpu(eb, &key, 0);
12018
12019         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12020         btrfs_commit_transaction(trans, root);
12021         btrfs_release_path(&path);
12022         return ret;
12023 }
12024
12025 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12026 {
12027         struct btrfs_path path;
12028         struct btrfs_trans_handle *trans;
12029         struct btrfs_key key;
12030         int ret;
12031
12032         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12033                bad->key.type, bad->key.offset);
12034         key.objectid = bad->root_id;
12035         key.type = BTRFS_ROOT_ITEM_KEY;
12036         key.offset = (u64)-1;
12037
12038         root = btrfs_read_fs_root(root->fs_info, &key);
12039         if (IS_ERR(root)) {
12040                 fprintf(stderr, "Couldn't find owner root %llu\n",
12041                         key.objectid);
12042                 return PTR_ERR(root);
12043         }
12044
12045         trans = btrfs_start_transaction(root, 1);
12046         if (IS_ERR(trans))
12047                 return PTR_ERR(trans);
12048
12049         btrfs_init_path(&path);
12050         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12051         if (ret) {
12052                 if (ret > 0)
12053                         ret = 0;
12054                 goto out;
12055         }
12056         ret = btrfs_del_item(trans, root, &path);
12057 out:
12058         btrfs_commit_transaction(trans, root);
12059         btrfs_release_path(&path);
12060         return ret;
12061 }
12062
12063 static int zero_log_tree(struct btrfs_root *root)
12064 {
12065         struct btrfs_trans_handle *trans;
12066         int ret;
12067
12068         trans = btrfs_start_transaction(root, 1);
12069         if (IS_ERR(trans)) {
12070                 ret = PTR_ERR(trans);
12071                 return ret;
12072         }
12073         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12074         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12075         ret = btrfs_commit_transaction(trans, root);
12076         return ret;
12077 }
12078
12079 static int populate_csum(struct btrfs_trans_handle *trans,
12080                          struct btrfs_root *csum_root, char *buf, u64 start,
12081                          u64 len)
12082 {
12083         u64 offset = 0;
12084         u64 sectorsize;
12085         int ret = 0;
12086
12087         while (offset < len) {
12088                 sectorsize = csum_root->fs_info->sectorsize;
12089                 ret = read_extent_data(csum_root, buf, start + offset,
12090                                        &sectorsize, 0);
12091                 if (ret)
12092                         break;
12093                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12094                                             start + offset, buf, sectorsize);
12095                 if (ret)
12096                         break;
12097                 offset += sectorsize;
12098         }
12099         return ret;
12100 }
12101
12102 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12103                                       struct btrfs_root *csum_root,
12104                                       struct btrfs_root *cur_root)
12105 {
12106         struct btrfs_path path;
12107         struct btrfs_key key;
12108         struct extent_buffer *node;
12109         struct btrfs_file_extent_item *fi;
12110         char *buf = NULL;
12111         u64 start = 0;
12112         u64 len = 0;
12113         int slot = 0;
12114         int ret = 0;
12115
12116         buf = malloc(cur_root->fs_info->sectorsize);
12117         if (!buf)
12118                 return -ENOMEM;
12119
12120         btrfs_init_path(&path);
12121         key.objectid = 0;
12122         key.offset = 0;
12123         key.type = 0;
12124         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12125         if (ret < 0)
12126                 goto out;
12127         /* Iterate all regular file extents and fill its csum */
12128         while (1) {
12129                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12130
12131                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12132                         goto next;
12133                 node = path.nodes[0];
12134                 slot = path.slots[0];
12135                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12136                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12137                         goto next;
12138                 start = btrfs_file_extent_disk_bytenr(node, fi);
12139                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12140
12141                 ret = populate_csum(trans, csum_root, buf, start, len);
12142                 if (ret == -EEXIST)
12143                         ret = 0;
12144                 if (ret < 0)
12145                         goto out;
12146 next:
12147                 /*
12148                  * TODO: if next leaf is corrupted, jump to nearest next valid
12149                  * leaf.
12150                  */
12151                 ret = btrfs_next_item(cur_root, &path);
12152                 if (ret < 0)
12153                         goto out;
12154                 if (ret > 0) {
12155                         ret = 0;
12156                         goto out;
12157                 }
12158         }
12159
12160 out:
12161         btrfs_release_path(&path);
12162         free(buf);
12163         return ret;
12164 }
12165
12166 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12167                                   struct btrfs_root *csum_root)
12168 {
12169         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12170         struct btrfs_path path;
12171         struct btrfs_root *tree_root = fs_info->tree_root;
12172         struct btrfs_root *cur_root;
12173         struct extent_buffer *node;
12174         struct btrfs_key key;
12175         int slot = 0;
12176         int ret = 0;
12177
12178         btrfs_init_path(&path);
12179         key.objectid = BTRFS_FS_TREE_OBJECTID;
12180         key.offset = 0;
12181         key.type = BTRFS_ROOT_ITEM_KEY;
12182         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12183         if (ret < 0)
12184                 goto out;
12185         if (ret > 0) {
12186                 ret = -ENOENT;
12187                 goto out;
12188         }
12189
12190         while (1) {
12191                 node = path.nodes[0];
12192                 slot = path.slots[0];
12193                 btrfs_item_key_to_cpu(node, &key, slot);
12194                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12195                         goto out;
12196                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12197                         goto next;
12198                 if (!is_fstree(key.objectid))
12199                         goto next;
12200                 key.offset = (u64)-1;
12201
12202                 cur_root = btrfs_read_fs_root(fs_info, &key);
12203                 if (IS_ERR(cur_root) || !cur_root) {
12204                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12205                                 key.objectid);
12206                         goto out;
12207                 }
12208                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12209                                 cur_root);
12210                 if (ret < 0)
12211                         goto out;
12212 next:
12213                 ret = btrfs_next_item(tree_root, &path);
12214                 if (ret > 0) {
12215                         ret = 0;
12216                         goto out;
12217                 }
12218                 if (ret < 0)
12219                         goto out;
12220         }
12221
12222 out:
12223         btrfs_release_path(&path);
12224         return ret;
12225 }
12226
12227 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12228                                       struct btrfs_root *csum_root)
12229 {
12230         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12231         struct btrfs_path path;
12232         struct btrfs_extent_item *ei;
12233         struct extent_buffer *leaf;
12234         char *buf;
12235         struct btrfs_key key;
12236         int ret;
12237
12238         btrfs_init_path(&path);
12239         key.objectid = 0;
12240         key.type = BTRFS_EXTENT_ITEM_KEY;
12241         key.offset = 0;
12242         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12243         if (ret < 0) {
12244                 btrfs_release_path(&path);
12245                 return ret;
12246         }
12247
12248         buf = malloc(csum_root->fs_info->sectorsize);
12249         if (!buf) {
12250                 btrfs_release_path(&path);
12251                 return -ENOMEM;
12252         }
12253
12254         while (1) {
12255                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12256                         ret = btrfs_next_leaf(extent_root, &path);
12257                         if (ret < 0)
12258                                 break;
12259                         if (ret) {
12260                                 ret = 0;
12261                                 break;
12262                         }
12263                 }
12264                 leaf = path.nodes[0];
12265
12266                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12267                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12268                         path.slots[0]++;
12269                         continue;
12270                 }
12271
12272                 ei = btrfs_item_ptr(leaf, path.slots[0],
12273                                     struct btrfs_extent_item);
12274                 if (!(btrfs_extent_flags(leaf, ei) &
12275                       BTRFS_EXTENT_FLAG_DATA)) {
12276                         path.slots[0]++;
12277                         continue;
12278                 }
12279
12280                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12281                                     key.offset);
12282                 if (ret)
12283                         break;
12284                 path.slots[0]++;
12285         }
12286
12287         btrfs_release_path(&path);
12288         free(buf);
12289         return ret;
12290 }
12291
12292 /*
12293  * Recalculate the csum and put it into the csum tree.
12294  *
12295  * Extent tree init will wipe out all the extent info, so in that case, we
12296  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12297  * will use fs/subvol trees to init the csum tree.
12298  */
12299 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12300                           struct btrfs_root *csum_root,
12301                           int search_fs_tree)
12302 {
12303         if (search_fs_tree)
12304                 return fill_csum_tree_from_fs(trans, csum_root);
12305         else
12306                 return fill_csum_tree_from_extent(trans, csum_root);
12307 }
12308
12309 static void free_roots_info_cache(void)
12310 {
12311         if (!roots_info_cache)
12312                 return;
12313
12314         while (!cache_tree_empty(roots_info_cache)) {
12315                 struct cache_extent *entry;
12316                 struct root_item_info *rii;
12317
12318                 entry = first_cache_extent(roots_info_cache);
12319                 if (!entry)
12320                         break;
12321                 remove_cache_extent(roots_info_cache, entry);
12322                 rii = container_of(entry, struct root_item_info, cache_extent);
12323                 free(rii);
12324         }
12325
12326         free(roots_info_cache);
12327         roots_info_cache = NULL;
12328 }
12329
12330 static int build_roots_info_cache(struct btrfs_fs_info *info)
12331 {
12332         int ret = 0;
12333         struct btrfs_key key;
12334         struct extent_buffer *leaf;
12335         struct btrfs_path path;
12336
12337         if (!roots_info_cache) {
12338                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12339                 if (!roots_info_cache)
12340                         return -ENOMEM;
12341                 cache_tree_init(roots_info_cache);
12342         }
12343
12344         btrfs_init_path(&path);
12345         key.objectid = 0;
12346         key.type = BTRFS_EXTENT_ITEM_KEY;
12347         key.offset = 0;
12348         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12349         if (ret < 0)
12350                 goto out;
12351         leaf = path.nodes[0];
12352
12353         while (1) {
12354                 struct btrfs_key found_key;
12355                 struct btrfs_extent_item *ei;
12356                 struct btrfs_extent_inline_ref *iref;
12357                 int slot = path.slots[0];
12358                 int type;
12359                 u64 flags;
12360                 u64 root_id;
12361                 u8 level;
12362                 struct cache_extent *entry;
12363                 struct root_item_info *rii;
12364
12365                 if (slot >= btrfs_header_nritems(leaf)) {
12366                         ret = btrfs_next_leaf(info->extent_root, &path);
12367                         if (ret < 0) {
12368                                 break;
12369                         } else if (ret) {
12370                                 ret = 0;
12371                                 break;
12372                         }
12373                         leaf = path.nodes[0];
12374                         slot = path.slots[0];
12375                 }
12376
12377                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12378
12379                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12380                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12381                         goto next;
12382
12383                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12384                 flags = btrfs_extent_flags(leaf, ei);
12385
12386                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12387                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12388                         goto next;
12389
12390                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12391                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12392                         level = found_key.offset;
12393                 } else {
12394                         struct btrfs_tree_block_info *binfo;
12395
12396                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12397                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12398                         level = btrfs_tree_block_level(leaf, binfo);
12399                 }
12400
12401                 /*
12402                  * For a root extent, it must be of the following type and the
12403                  * first (and only one) iref in the item.
12404                  */
12405                 type = btrfs_extent_inline_ref_type(leaf, iref);
12406                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12407                         goto next;
12408
12409                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12410                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12411                 if (!entry) {
12412                         rii = malloc(sizeof(struct root_item_info));
12413                         if (!rii) {
12414                                 ret = -ENOMEM;
12415                                 goto out;
12416                         }
12417                         rii->cache_extent.start = root_id;
12418                         rii->cache_extent.size = 1;
12419                         rii->level = (u8)-1;
12420                         entry = &rii->cache_extent;
12421                         ret = insert_cache_extent(roots_info_cache, entry);
12422                         ASSERT(ret == 0);
12423                 } else {
12424                         rii = container_of(entry, struct root_item_info,
12425                                            cache_extent);
12426                 }
12427
12428                 ASSERT(rii->cache_extent.start == root_id);
12429                 ASSERT(rii->cache_extent.size == 1);
12430
12431                 if (level > rii->level || rii->level == (u8)-1) {
12432                         rii->level = level;
12433                         rii->bytenr = found_key.objectid;
12434                         rii->gen = btrfs_extent_generation(leaf, ei);
12435                         rii->node_count = 1;
12436                 } else if (level == rii->level) {
12437                         rii->node_count++;
12438                 }
12439 next:
12440                 path.slots[0]++;
12441         }
12442
12443 out:
12444         btrfs_release_path(&path);
12445
12446         return ret;
12447 }
12448
12449 static int maybe_repair_root_item(struct btrfs_path *path,
12450                                   const struct btrfs_key *root_key,
12451                                   const int read_only_mode)
12452 {
12453         const u64 root_id = root_key->objectid;
12454         struct cache_extent *entry;
12455         struct root_item_info *rii;
12456         struct btrfs_root_item ri;
12457         unsigned long offset;
12458
12459         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12460         if (!entry) {
12461                 fprintf(stderr,
12462                         "Error: could not find extent items for root %llu\n",
12463                         root_key->objectid);
12464                 return -ENOENT;
12465         }
12466
12467         rii = container_of(entry, struct root_item_info, cache_extent);
12468         ASSERT(rii->cache_extent.start == root_id);
12469         ASSERT(rii->cache_extent.size == 1);
12470
12471         if (rii->node_count != 1) {
12472                 fprintf(stderr,
12473                         "Error: could not find btree root extent for root %llu\n",
12474                         root_id);
12475                 return -ENOENT;
12476         }
12477
12478         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12479         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12480
12481         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12482             btrfs_root_level(&ri) != rii->level ||
12483             btrfs_root_generation(&ri) != rii->gen) {
12484
12485                 /*
12486                  * If we're in repair mode but our caller told us to not update
12487                  * the root item, i.e. just check if it needs to be updated, don't
12488                  * print this message, since the caller will call us again shortly
12489                  * for the same root item without read only mode (the caller will
12490                  * open a transaction first).
12491                  */
12492                 if (!(read_only_mode && repair))
12493                         fprintf(stderr,
12494                                 "%sroot item for root %llu,"
12495                                 " current bytenr %llu, current gen %llu, current level %u,"
12496                                 " new bytenr %llu, new gen %llu, new level %u\n",
12497                                 (read_only_mode ? "" : "fixing "),
12498                                 root_id,
12499                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12500                                 btrfs_root_level(&ri),
12501                                 rii->bytenr, rii->gen, rii->level);
12502
12503                 if (btrfs_root_generation(&ri) > rii->gen) {
12504                         fprintf(stderr,
12505                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12506                                 root_id, btrfs_root_generation(&ri), rii->gen);
12507                         return -EINVAL;
12508                 }
12509
12510                 if (!read_only_mode) {
12511                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12512                         btrfs_set_root_level(&ri, rii->level);
12513                         btrfs_set_root_generation(&ri, rii->gen);
12514                         write_extent_buffer(path->nodes[0], &ri,
12515                                             offset, sizeof(ri));
12516                 }
12517
12518                 return 1;
12519         }
12520
12521         return 0;
12522 }
12523
12524 /*
12525  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12526  * caused read-only snapshots to be corrupted if they were created at a moment
12527  * when the source subvolume/snapshot had orphan items. The issue was that the
12528  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12529  * node instead of the post orphan cleanup root node.
12530  * So this function, and its callees, just detects and fixes those cases. Even
12531  * though the regression was for read-only snapshots, this function applies to
12532  * any snapshot/subvolume root.
12533  * This must be run before any other repair code - not doing it so, makes other
12534  * repair code delete or modify backrefs in the extent tree for example, which
12535  * will result in an inconsistent fs after repairing the root items.
12536  */
12537 static int repair_root_items(struct btrfs_fs_info *info)
12538 {
12539         struct btrfs_path path;
12540         struct btrfs_key key;
12541         struct extent_buffer *leaf;
12542         struct btrfs_trans_handle *trans = NULL;
12543         int ret = 0;
12544         int bad_roots = 0;
12545         int need_trans = 0;
12546
12547         btrfs_init_path(&path);
12548
12549         ret = build_roots_info_cache(info);
12550         if (ret)
12551                 goto out;
12552
12553         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12554         key.type = BTRFS_ROOT_ITEM_KEY;
12555         key.offset = 0;
12556
12557 again:
12558         /*
12559          * Avoid opening and committing transactions if a leaf doesn't have
12560          * any root items that need to be fixed, so that we avoid rotating
12561          * backup roots unnecessarily.
12562          */
12563         if (need_trans) {
12564                 trans = btrfs_start_transaction(info->tree_root, 1);
12565                 if (IS_ERR(trans)) {
12566                         ret = PTR_ERR(trans);
12567                         goto out;
12568                 }
12569         }
12570
12571         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12572                                 0, trans ? 1 : 0);
12573         if (ret < 0)
12574                 goto out;
12575         leaf = path.nodes[0];
12576
12577         while (1) {
12578                 struct btrfs_key found_key;
12579
12580                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12581                         int no_more_keys = find_next_key(&path, &key);
12582
12583                         btrfs_release_path(&path);
12584                         if (trans) {
12585                                 ret = btrfs_commit_transaction(trans,
12586                                                                info->tree_root);
12587                                 trans = NULL;
12588                                 if (ret < 0)
12589                                         goto out;
12590                         }
12591                         need_trans = 0;
12592                         if (no_more_keys)
12593                                 break;
12594                         goto again;
12595                 }
12596
12597                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12598
12599                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12600                         goto next;
12601                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12602                         goto next;
12603
12604                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12605                 if (ret < 0)
12606                         goto out;
12607                 if (ret) {
12608                         if (!trans && repair) {
12609                                 need_trans = 1;
12610                                 key = found_key;
12611                                 btrfs_release_path(&path);
12612                                 goto again;
12613                         }
12614                         bad_roots++;
12615                 }
12616 next:
12617                 path.slots[0]++;
12618         }
12619         ret = 0;
12620 out:
12621         free_roots_info_cache();
12622         btrfs_release_path(&path);
12623         if (trans)
12624                 btrfs_commit_transaction(trans, info->tree_root);
12625         if (ret < 0)
12626                 return ret;
12627
12628         return bad_roots;
12629 }
12630
12631 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12632 {
12633         struct btrfs_trans_handle *trans;
12634         struct btrfs_block_group_cache *bg_cache;
12635         u64 current = 0;
12636         int ret = 0;
12637
12638         /* Clear all free space cache inodes and its extent data */
12639         while (1) {
12640                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12641                 if (!bg_cache)
12642                         break;
12643                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12644                 if (ret < 0)
12645                         return ret;
12646                 current = bg_cache->key.objectid + bg_cache->key.offset;
12647         }
12648
12649         /* Don't forget to set cache_generation to -1 */
12650         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12651         if (IS_ERR(trans)) {
12652                 error("failed to update super block cache generation");
12653                 return PTR_ERR(trans);
12654         }
12655         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12656         btrfs_commit_transaction(trans, fs_info->tree_root);
12657
12658         return ret;
12659 }
12660
12661 const char * const cmd_check_usage[] = {
12662         "btrfs check [options] <device>",
12663         "Check structural integrity of a filesystem (unmounted).",
12664         "Check structural integrity of an unmounted filesystem. Verify internal",
12665         "trees' consistency and item connectivity. In the repair mode try to",
12666         "fix the problems found. ",
12667         "WARNING: the repair mode is considered dangerous",
12668         "",
12669         "-s|--super <superblock>     use this superblock copy",
12670         "-b|--backup                 use the first valid backup root copy",
12671         "--repair                    try to repair the filesystem",
12672         "--readonly                  run in read-only mode (default)",
12673         "--init-csum-tree            create a new CRC tree",
12674         "--init-extent-tree          create a new extent tree",
12675         "--mode <MODE>               allows choice of memory/IO trade-offs",
12676         "                            where MODE is one of:",
12677         "                            original - read inodes and extents to memory (requires",
12678         "                                       more memory, does less IO)",
12679         "                            lowmem   - try to use less memory but read blocks again",
12680         "                                       when needed",
12681         "--check-data-csum           verify checksums of data blocks",
12682         "-Q|--qgroup-report          print a report on qgroup consistency",
12683         "-E|--subvol-extents <subvolid>",
12684         "                            print subvolume extents and sharing state",
12685         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12686         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12687         "-p|--progress               indicate progress",
12688         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12689         NULL
12690 };
12691
12692 int cmd_check(int argc, char **argv)
12693 {
12694         struct cache_tree root_cache;
12695         struct btrfs_root *root;
12696         struct btrfs_fs_info *info;
12697         u64 bytenr = 0;
12698         u64 subvolid = 0;
12699         u64 tree_root_bytenr = 0;
12700         u64 chunk_root_bytenr = 0;
12701         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12702         int ret;
12703         int err = 0;
12704         u64 num;
12705         int init_csum_tree = 0;
12706         int readonly = 0;
12707         int clear_space_cache = 0;
12708         int qgroup_report = 0;
12709         int qgroups_repaired = 0;
12710         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12711
12712         while(1) {
12713                 int c;
12714                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12715                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12716                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12717                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12718                 static const struct option long_options[] = {
12719                         { "super", required_argument, NULL, 's' },
12720                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12721                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12722                         { "init-csum-tree", no_argument, NULL,
12723                                 GETOPT_VAL_INIT_CSUM },
12724                         { "init-extent-tree", no_argument, NULL,
12725                                 GETOPT_VAL_INIT_EXTENT },
12726                         { "check-data-csum", no_argument, NULL,
12727                                 GETOPT_VAL_CHECK_CSUM },
12728                         { "backup", no_argument, NULL, 'b' },
12729                         { "subvol-extents", required_argument, NULL, 'E' },
12730                         { "qgroup-report", no_argument, NULL, 'Q' },
12731                         { "tree-root", required_argument, NULL, 'r' },
12732                         { "chunk-root", required_argument, NULL,
12733                                 GETOPT_VAL_CHUNK_TREE },
12734                         { "progress", no_argument, NULL, 'p' },
12735                         { "mode", required_argument, NULL,
12736                                 GETOPT_VAL_MODE },
12737                         { "clear-space-cache", required_argument, NULL,
12738                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12739                         { NULL, 0, NULL, 0}
12740                 };
12741
12742                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12743                 if (c < 0)
12744                         break;
12745                 switch(c) {
12746                         case 'a': /* ignored */ break;
12747                         case 'b':
12748                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12749                                 break;
12750                         case 's':
12751                                 num = arg_strtou64(optarg);
12752                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12753                                         error(
12754                                         "super mirror should be less than %d",
12755                                                 BTRFS_SUPER_MIRROR_MAX);
12756                                         exit(1);
12757                                 }
12758                                 bytenr = btrfs_sb_offset(((int)num));
12759                                 printf("using SB copy %llu, bytenr %llu\n", num,
12760                                        (unsigned long long)bytenr);
12761                                 break;
12762                         case 'Q':
12763                                 qgroup_report = 1;
12764                                 break;
12765                         case 'E':
12766                                 subvolid = arg_strtou64(optarg);
12767                                 break;
12768                         case 'r':
12769                                 tree_root_bytenr = arg_strtou64(optarg);
12770                                 break;
12771                         case GETOPT_VAL_CHUNK_TREE:
12772                                 chunk_root_bytenr = arg_strtou64(optarg);
12773                                 break;
12774                         case 'p':
12775                                 ctx.progress_enabled = true;
12776                                 break;
12777                         case '?':
12778                         case 'h':
12779                                 usage(cmd_check_usage);
12780                         case GETOPT_VAL_REPAIR:
12781                                 printf("enabling repair mode\n");
12782                                 repair = 1;
12783                                 ctree_flags |= OPEN_CTREE_WRITES;
12784                                 break;
12785                         case GETOPT_VAL_READONLY:
12786                                 readonly = 1;
12787                                 break;
12788                         case GETOPT_VAL_INIT_CSUM:
12789                                 printf("Creating a new CRC tree\n");
12790                                 init_csum_tree = 1;
12791                                 repair = 1;
12792                                 ctree_flags |= OPEN_CTREE_WRITES;
12793                                 break;
12794                         case GETOPT_VAL_INIT_EXTENT:
12795                                 init_extent_tree = 1;
12796                                 ctree_flags |= (OPEN_CTREE_WRITES |
12797                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12798                                 repair = 1;
12799                                 break;
12800                         case GETOPT_VAL_CHECK_CSUM:
12801                                 check_data_csum = 1;
12802                                 break;
12803                         case GETOPT_VAL_MODE:
12804                                 check_mode = parse_check_mode(optarg);
12805                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12806                                         error("unknown mode: %s", optarg);
12807                                         exit(1);
12808                                 }
12809                                 break;
12810                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12811                                 if (strcmp(optarg, "v1") == 0) {
12812                                         clear_space_cache = 1;
12813                                 } else if (strcmp(optarg, "v2") == 0) {
12814                                         clear_space_cache = 2;
12815                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12816                                 } else {
12817                                         error(
12818                 "invalid argument to --clear-space-cache, must be v1 or v2");
12819                                         exit(1);
12820                                 }
12821                                 ctree_flags |= OPEN_CTREE_WRITES;
12822                                 break;
12823                 }
12824         }
12825
12826         if (check_argc_exact(argc - optind, 1))
12827                 usage(cmd_check_usage);
12828
12829         if (ctx.progress_enabled) {
12830                 ctx.tp = TASK_NOTHING;
12831                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12832         }
12833
12834         /* This check is the only reason for --readonly to exist */
12835         if (readonly && repair) {
12836                 error("repair options are not compatible with --readonly");
12837                 exit(1);
12838         }
12839
12840         /*
12841          * Not supported yet
12842          */
12843         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12844                 error("low memory mode doesn't support repair yet");
12845                 exit(1);
12846         }
12847
12848         radix_tree_init();
12849         cache_tree_init(&root_cache);
12850
12851         if((ret = check_mounted(argv[optind])) < 0) {
12852                 error("could not check mount status: %s", strerror(-ret));
12853                 err |= !!ret;
12854                 goto err_out;
12855         } else if(ret) {
12856                 error("%s is currently mounted, aborting", argv[optind]);
12857                 ret = -EBUSY;
12858                 err |= !!ret;
12859                 goto err_out;
12860         }
12861
12862         /* only allow partial opening under repair mode */
12863         if (repair)
12864                 ctree_flags |= OPEN_CTREE_PARTIAL;
12865
12866         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12867                                   chunk_root_bytenr, ctree_flags);
12868         if (!info) {
12869                 error("cannot open file system");
12870                 ret = -EIO;
12871                 err |= !!ret;
12872                 goto err_out;
12873         }
12874
12875         global_info = info;
12876         root = info->fs_root;
12877         if (clear_space_cache == 1) {
12878                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12879                         error(
12880                 "free space cache v2 detected, use --clear-space-cache v2");
12881                         ret = 1;
12882                         goto close_out;
12883                 }
12884                 printf("Clearing free space cache\n");
12885                 ret = clear_free_space_cache(info);
12886                 if (ret) {
12887                         error("failed to clear free space cache");
12888                         ret = 1;
12889                 } else {
12890                         printf("Free space cache cleared\n");
12891                 }
12892                 goto close_out;
12893         } else if (clear_space_cache == 2) {
12894                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12895                         printf("no free space cache v2 to clear\n");
12896                         ret = 0;
12897                         goto close_out;
12898                 }
12899                 printf("Clear free space cache v2\n");
12900                 ret = btrfs_clear_free_space_tree(info);
12901                 if (ret) {
12902                         error("failed to clear free space cache v2: %d", ret);
12903                         ret = 1;
12904                 } else {
12905                         printf("free space cache v2 cleared\n");
12906                 }
12907                 goto close_out;
12908         }
12909
12910         /*
12911          * repair mode will force us to commit transaction which
12912          * will make us fail to load log tree when mounting.
12913          */
12914         if (repair && btrfs_super_log_root(info->super_copy)) {
12915                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12916                 if (!ret) {
12917                         ret = 1;
12918                         err |= !!ret;
12919                         goto close_out;
12920                 }
12921                 ret = zero_log_tree(root);
12922                 err |= !!ret;
12923                 if (ret) {
12924                         error("failed to zero log tree: %d", ret);
12925                         goto close_out;
12926                 }
12927         }
12928
12929         uuid_unparse(info->super_copy->fsid, uuidbuf);
12930         if (qgroup_report) {
12931                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12932                        uuidbuf);
12933                 ret = qgroup_verify_all(info);
12934                 err |= !!ret;
12935                 if (ret == 0)
12936                         report_qgroups(1);
12937                 goto close_out;
12938         }
12939         if (subvolid) {
12940                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12941                        subvolid, argv[optind], uuidbuf);
12942                 ret = print_extent_state(info, subvolid);
12943                 err |= !!ret;
12944                 goto close_out;
12945         }
12946         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12947
12948         if (!extent_buffer_uptodate(info->tree_root->node) ||
12949             !extent_buffer_uptodate(info->dev_root->node) ||
12950             !extent_buffer_uptodate(info->chunk_root->node)) {
12951                 error("critical roots corrupted, unable to check the filesystem");
12952                 err |= !!ret;
12953                 ret = -EIO;
12954                 goto close_out;
12955         }
12956
12957         if (init_extent_tree || init_csum_tree) {
12958                 struct btrfs_trans_handle *trans;
12959
12960                 trans = btrfs_start_transaction(info->extent_root, 0);
12961                 if (IS_ERR(trans)) {
12962                         error("error starting transaction");
12963                         ret = PTR_ERR(trans);
12964                         err |= !!ret;
12965                         goto close_out;
12966                 }
12967
12968                 if (init_extent_tree) {
12969                         printf("Creating a new extent tree\n");
12970                         ret = reinit_extent_tree(trans, info);
12971                         err |= !!ret;
12972                         if (ret)
12973                                 goto close_out;
12974                 }
12975
12976                 if (init_csum_tree) {
12977                         printf("Reinitialize checksum tree\n");
12978                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12979                         if (ret) {
12980                                 error("checksum tree initialization failed: %d",
12981                                                 ret);
12982                                 ret = -EIO;
12983                                 err |= !!ret;
12984                                 goto close_out;
12985                         }
12986
12987                         ret = fill_csum_tree(trans, info->csum_root,
12988                                              init_extent_tree);
12989                         err |= !!ret;
12990                         if (ret) {
12991                                 error("checksum tree refilling failed: %d", ret);
12992                                 return -EIO;
12993                         }
12994                 }
12995                 /*
12996                  * Ok now we commit and run the normal fsck, which will add
12997                  * extent entries for all of the items it finds.
12998                  */
12999                 ret = btrfs_commit_transaction(trans, info->extent_root);
13000                 err |= !!ret;
13001                 if (ret)
13002                         goto close_out;
13003         }
13004         if (!extent_buffer_uptodate(info->extent_root->node)) {
13005                 error("critical: extent_root, unable to check the filesystem");
13006                 ret = -EIO;
13007                 err |= !!ret;
13008                 goto close_out;
13009         }
13010         if (!extent_buffer_uptodate(info->csum_root->node)) {
13011                 error("critical: csum_root, unable to check the filesystem");
13012                 ret = -EIO;
13013                 err |= !!ret;
13014                 goto close_out;
13015         }
13016
13017         if (!ctx.progress_enabled)
13018                 fprintf(stderr, "checking extents\n");
13019         if (check_mode == CHECK_MODE_LOWMEM)
13020                 ret = check_chunks_and_extents_v2(root);
13021         else
13022                 ret = check_chunks_and_extents(root);
13023         err |= !!ret;
13024         if (ret)
13025                 error(
13026                 "errors found in extent allocation tree or chunk allocation");
13027
13028         ret = repair_root_items(info);
13029         err |= !!ret;
13030         if (ret < 0) {
13031                 error("failed to repair root items: %s", strerror(-ret));
13032                 goto close_out;
13033         }
13034         if (repair) {
13035                 fprintf(stderr, "Fixed %d roots.\n", ret);
13036                 ret = 0;
13037         } else if (ret > 0) {
13038                 fprintf(stderr,
13039                        "Found %d roots with an outdated root item.\n",
13040                        ret);
13041                 fprintf(stderr,
13042                         "Please run a filesystem check with the option --repair to fix them.\n");
13043                 ret = 1;
13044                 err |= !!ret;
13045                 goto close_out;
13046         }
13047
13048         if (!ctx.progress_enabled) {
13049                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13050                         fprintf(stderr, "checking free space tree\n");
13051                 else
13052                         fprintf(stderr, "checking free space cache\n");
13053         }
13054         ret = check_space_cache(root);
13055         err |= !!ret;
13056         if (ret) {
13057                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13058                         error("errors found in free space tree");
13059                 else
13060                         error("errors found in free space cache");
13061                 goto out;
13062         }
13063
13064         /*
13065          * We used to have to have these hole extents in between our real
13066          * extents so if we don't have this flag set we need to make sure there
13067          * are no gaps in the file extents for inodes, otherwise we can just
13068          * ignore it when this happens.
13069          */
13070         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13071         if (!ctx.progress_enabled)
13072                 fprintf(stderr, "checking fs roots\n");
13073         if (check_mode == CHECK_MODE_LOWMEM)
13074                 ret = check_fs_roots_v2(root->fs_info);
13075         else
13076                 ret = check_fs_roots(root, &root_cache);
13077         err |= !!ret;
13078         if (ret) {
13079                 error("errors found in fs roots");
13080                 goto out;
13081         }
13082
13083         fprintf(stderr, "checking csums\n");
13084         ret = check_csums(root);
13085         err |= !!ret;
13086         if (ret) {
13087                 error("errors found in csum tree");
13088                 goto out;
13089         }
13090
13091         fprintf(stderr, "checking root refs\n");
13092         /* For low memory mode, check_fs_roots_v2 handles root refs */
13093         if (check_mode != CHECK_MODE_LOWMEM) {
13094                 ret = check_root_refs(root, &root_cache);
13095                 err |= !!ret;
13096                 if (ret) {
13097                         error("errors found in root refs");
13098                         goto out;
13099                 }
13100         }
13101
13102         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13103                 struct extent_buffer *eb;
13104
13105                 eb = list_first_entry(&root->fs_info->recow_ebs,
13106                                       struct extent_buffer, recow);
13107                 list_del_init(&eb->recow);
13108                 ret = recow_extent_buffer(root, eb);
13109                 err |= !!ret;
13110                 if (ret) {
13111                         error("fails to fix transid errors");
13112                         break;
13113                 }
13114         }
13115
13116         while (!list_empty(&delete_items)) {
13117                 struct bad_item *bad;
13118
13119                 bad = list_first_entry(&delete_items, struct bad_item, list);
13120                 list_del_init(&bad->list);
13121                 if (repair) {
13122                         ret = delete_bad_item(root, bad);
13123                         err |= !!ret;
13124                 }
13125                 free(bad);
13126         }
13127
13128         if (info->quota_enabled) {
13129                 fprintf(stderr, "checking quota groups\n");
13130                 ret = qgroup_verify_all(info);
13131                 err |= !!ret;
13132                 if (ret) {
13133                         error("failed to check quota groups");
13134                         goto out;
13135                 }
13136                 report_qgroups(0);
13137                 ret = repair_qgroups(info, &qgroups_repaired);
13138                 err |= !!ret;
13139                 if (err) {
13140                         error("failed to repair quota groups");
13141                         goto out;
13142                 }
13143                 ret = 0;
13144         }
13145
13146         if (!list_empty(&root->fs_info->recow_ebs)) {
13147                 error("transid errors in file system");
13148                 ret = 1;
13149                 err |= !!ret;
13150         }
13151 out:
13152         if (found_old_backref) { /*
13153                  * there was a disk format change when mixed
13154                  * backref was in testing tree. The old format
13155                  * existed about one week.
13156                  */
13157                 printf("\n * Found old mixed backref format. "
13158                        "The old format is not supported! *"
13159                        "\n * Please mount the FS in readonly mode, "
13160                        "backup data and re-format the FS. *\n\n");
13161                 err |= 1;
13162         }
13163         printf("found %llu bytes used, ",
13164                (unsigned long long)bytes_used);
13165         if (err)
13166                 printf("error(s) found\n");
13167         else
13168                 printf("no error found\n");
13169         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13170         printf("total tree bytes: %llu\n",
13171                (unsigned long long)total_btree_bytes);
13172         printf("total fs tree bytes: %llu\n",
13173                (unsigned long long)total_fs_tree_bytes);
13174         printf("total extent tree bytes: %llu\n",
13175                (unsigned long long)total_extent_tree_bytes);
13176         printf("btree space waste bytes: %llu\n",
13177                (unsigned long long)btree_space_waste);
13178         printf("file data blocks allocated: %llu\n referenced %llu\n",
13179                 (unsigned long long)data_bytes_allocated,
13180                 (unsigned long long)data_bytes_referenced);
13181
13182         free_qgroup_counts();
13183         free_root_recs_tree(&root_cache);
13184 close_out:
13185         close_ctree(root);
13186 err_out:
13187         if (ctx.progress_enabled)
13188                 task_deinit(ctx.info);
13189
13190         return err;
13191 }