btrfs-progs: Fix restoring image from multi devices fs into single device
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (cur + sizeof(*di) + name_len > total ||
1516                     name_len > BTRFS_NAME_LEN) {
1517                         error = REF_ERR_NAME_TOO_LONG;
1518
1519                         if (cur + sizeof(*di) > total)
1520                                 break;
1521                         len = min_t(u32, total - cur - sizeof(*di),
1522                                     BTRFS_NAME_LEN);
1523                 } else {
1524                         len = name_len;
1525                         error = 0;
1526                 }
1527
1528                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529
1530                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1531                         add_inode_backref(inode_cache, location.objectid,
1532                                           key->objectid, key->offset, namebuf,
1533                                           len, filetype, key->type, error);
1534                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1535                         add_inode_backref(root_cache, location.objectid,
1536                                           key->objectid, key->offset,
1537                                           namebuf, len, filetype,
1538                                           key->type, error);
1539                 } else {
1540                         fprintf(stderr, "invalid location in dir item %u\n",
1541                                 location.type);
1542                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1543                                           key->objectid, key->offset, namebuf,
1544                                           len, filetype, key->type, error);
1545                 }
1546
1547                 len = sizeof(*di) + name_len + data_len;
1548                 di = (struct btrfs_dir_item *)((char *)di + len);
1549                 cur += len;
1550         }
1551         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1552                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1553
1554         return 0;
1555 }
1556
1557 static int process_inode_ref(struct extent_buffer *eb,
1558                              int slot, struct btrfs_key *key,
1559                              struct shared_node *active_node)
1560 {
1561         u32 total;
1562         u32 cur = 0;
1563         u32 len;
1564         u32 name_len;
1565         u64 index;
1566         int error;
1567         struct cache_tree *inode_cache;
1568         struct btrfs_inode_ref *ref;
1569         char namebuf[BTRFS_NAME_LEN];
1570
1571         inode_cache = &active_node->inode_cache;
1572
1573         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1574         total = btrfs_item_size_nr(eb, slot);
1575         while (cur < total) {
1576                 name_len = btrfs_inode_ref_name_len(eb, ref);
1577                 index = btrfs_inode_ref_index(eb, ref);
1578
1579                 /* inode_ref + namelen should not cross item boundary */
1580                 if (cur + sizeof(*ref) + name_len > total ||
1581                     name_len > BTRFS_NAME_LEN) {
1582                         if (total < cur + sizeof(*ref))
1583                                 break;
1584
1585                         /* Still try to read out the remaining part */
1586                         len = min_t(u32, total - cur - sizeof(*ref),
1587                                     BTRFS_NAME_LEN);
1588                         error = REF_ERR_NAME_TOO_LONG;
1589                 } else {
1590                         len = name_len;
1591                         error = 0;
1592                 }
1593
1594                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1595                 add_inode_backref(inode_cache, key->objectid, key->offset,
1596                                   index, namebuf, len, 0, key->type, error);
1597
1598                 len = sizeof(*ref) + name_len;
1599                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1600                 cur += len;
1601         }
1602         return 0;
1603 }
1604
1605 static int process_inode_extref(struct extent_buffer *eb,
1606                                 int slot, struct btrfs_key *key,
1607                                 struct shared_node *active_node)
1608 {
1609         u32 total;
1610         u32 cur = 0;
1611         u32 len;
1612         u32 name_len;
1613         u64 index;
1614         u64 parent;
1615         int error;
1616         struct cache_tree *inode_cache;
1617         struct btrfs_inode_extref *extref;
1618         char namebuf[BTRFS_NAME_LEN];
1619
1620         inode_cache = &active_node->inode_cache;
1621
1622         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1623         total = btrfs_item_size_nr(eb, slot);
1624         while (cur < total) {
1625                 name_len = btrfs_inode_extref_name_len(eb, extref);
1626                 index = btrfs_inode_extref_index(eb, extref);
1627                 parent = btrfs_inode_extref_parent(eb, extref);
1628                 if (name_len <= BTRFS_NAME_LEN) {
1629                         len = name_len;
1630                         error = 0;
1631                 } else {
1632                         len = BTRFS_NAME_LEN;
1633                         error = REF_ERR_NAME_TOO_LONG;
1634                 }
1635                 read_extent_buffer(eb, namebuf,
1636                                    (unsigned long)(extref + 1), len);
1637                 add_inode_backref(inode_cache, key->objectid, parent,
1638                                   index, namebuf, len, 0, key->type, error);
1639
1640                 len = sizeof(*extref) + name_len;
1641                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1642                 cur += len;
1643         }
1644         return 0;
1645
1646 }
1647
1648 static int count_csum_range(struct btrfs_root *root, u64 start,
1649                             u64 len, u64 *found)
1650 {
1651         struct btrfs_key key;
1652         struct btrfs_path path;
1653         struct extent_buffer *leaf;
1654         int ret;
1655         size_t size;
1656         *found = 0;
1657         u64 csum_end;
1658         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1659
1660         btrfs_init_path(&path);
1661
1662         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1663         key.offset = start;
1664         key.type = BTRFS_EXTENT_CSUM_KEY;
1665
1666         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1667                                 &key, &path, 0, 0);
1668         if (ret < 0)
1669                 goto out;
1670         if (ret > 0 && path.slots[0] > 0) {
1671                 leaf = path.nodes[0];
1672                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1673                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1674                     key.type == BTRFS_EXTENT_CSUM_KEY)
1675                         path.slots[0]--;
1676         }
1677
1678         while (len > 0) {
1679                 leaf = path.nodes[0];
1680                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1681                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1682                         if (ret > 0)
1683                                 break;
1684                         else if (ret < 0)
1685                                 goto out;
1686                         leaf = path.nodes[0];
1687                 }
1688
1689                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1690                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1691                     key.type != BTRFS_EXTENT_CSUM_KEY)
1692                         break;
1693
1694                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1695                 if (key.offset >= start + len)
1696                         break;
1697
1698                 if (key.offset > start)
1699                         start = key.offset;
1700
1701                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1702                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1703                 if (csum_end > start) {
1704                         size = min(csum_end - start, len);
1705                         len -= size;
1706                         start += size;
1707                         *found += size;
1708                 }
1709
1710                 path.slots[0]++;
1711         }
1712 out:
1713         btrfs_release_path(&path);
1714         if (ret < 0)
1715                 return ret;
1716         return 0;
1717 }
1718
1719 static int process_file_extent(struct btrfs_root *root,
1720                                 struct extent_buffer *eb,
1721                                 int slot, struct btrfs_key *key,
1722                                 struct shared_node *active_node)
1723 {
1724         struct inode_record *rec;
1725         struct btrfs_file_extent_item *fi;
1726         u64 num_bytes = 0;
1727         u64 disk_bytenr = 0;
1728         u64 extent_offset = 0;
1729         u64 mask = root->sectorsize - 1;
1730         int extent_type;
1731         int ret;
1732
1733         rec = active_node->current;
1734         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1735         rec->found_file_extent = 1;
1736
1737         if (rec->extent_start == (u64)-1) {
1738                 rec->extent_start = key->offset;
1739                 rec->extent_end = key->offset;
1740         }
1741
1742         if (rec->extent_end > key->offset)
1743                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1744         else if (rec->extent_end < key->offset) {
1745                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1746                                            key->offset - rec->extent_end);
1747                 if (ret < 0)
1748                         return ret;
1749         }
1750
1751         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1752         extent_type = btrfs_file_extent_type(eb, fi);
1753
1754         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1755                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1756                 if (num_bytes == 0)
1757                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1758                 rec->found_size += num_bytes;
1759                 num_bytes = (num_bytes + mask) & ~mask;
1760         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1761                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1762                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1763                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1764                 extent_offset = btrfs_file_extent_offset(eb, fi);
1765                 if (num_bytes == 0 || (num_bytes & mask))
1766                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767                 if (num_bytes + extent_offset >
1768                     btrfs_file_extent_ram_bytes(eb, fi))
1769                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1770                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1771                     (btrfs_file_extent_compression(eb, fi) ||
1772                      btrfs_file_extent_encryption(eb, fi) ||
1773                      btrfs_file_extent_other_encoding(eb, fi)))
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 if (disk_bytenr > 0)
1776                         rec->found_size += num_bytes;
1777         } else {
1778                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779         }
1780         rec->extent_end = key->offset + num_bytes;
1781
1782         /*
1783          * The data reloc tree will copy full extents into its inode and then
1784          * copy the corresponding csums.  Because the extent it copied could be
1785          * a preallocated extent that hasn't been written to yet there may be no
1786          * csums to copy, ergo we won't have csums for our file extent.  This is
1787          * ok so just don't bother checking csums if the inode belongs to the
1788          * data reloc tree.
1789          */
1790         if (disk_bytenr > 0 &&
1791             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1792                 u64 found;
1793                 if (btrfs_file_extent_compression(eb, fi))
1794                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1795                 else
1796                         disk_bytenr += extent_offset;
1797
1798                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1799                 if (ret < 0)
1800                         return ret;
1801                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1802                         if (found > 0)
1803                                 rec->found_csum_item = 1;
1804                         if (found < num_bytes)
1805                                 rec->some_csum_missing = 1;
1806                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1807                         if (found > 0)
1808                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1809                 }
1810         }
1811         return 0;
1812 }
1813
1814 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1815                             struct walk_control *wc)
1816 {
1817         struct btrfs_key key;
1818         u32 nritems;
1819         int i;
1820         int ret = 0;
1821         struct cache_tree *inode_cache;
1822         struct shared_node *active_node;
1823
1824         if (wc->root_level == wc->active_node &&
1825             btrfs_root_refs(&root->root_item) == 0)
1826                 return 0;
1827
1828         active_node = wc->nodes[wc->active_node];
1829         inode_cache = &active_node->inode_cache;
1830         nritems = btrfs_header_nritems(eb);
1831         for (i = 0; i < nritems; i++) {
1832                 btrfs_item_key_to_cpu(eb, &key, i);
1833
1834                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1835                         continue;
1836                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1837                         continue;
1838
1839                 if (active_node->current == NULL ||
1840                     active_node->current->ino < key.objectid) {
1841                         if (active_node->current) {
1842                                 active_node->current->checked = 1;
1843                                 maybe_free_inode_rec(inode_cache,
1844                                                      active_node->current);
1845                         }
1846                         active_node->current = get_inode_rec(inode_cache,
1847                                                              key.objectid, 1);
1848                         BUG_ON(IS_ERR(active_node->current));
1849                 }
1850                 switch (key.type) {
1851                 case BTRFS_DIR_ITEM_KEY:
1852                 case BTRFS_DIR_INDEX_KEY:
1853                         ret = process_dir_item(eb, i, &key, active_node);
1854                         break;
1855                 case BTRFS_INODE_REF_KEY:
1856                         ret = process_inode_ref(eb, i, &key, active_node);
1857                         break;
1858                 case BTRFS_INODE_EXTREF_KEY:
1859                         ret = process_inode_extref(eb, i, &key, active_node);
1860                         break;
1861                 case BTRFS_INODE_ITEM_KEY:
1862                         ret = process_inode_item(eb, i, &key, active_node);
1863                         break;
1864                 case BTRFS_EXTENT_DATA_KEY:
1865                         ret = process_file_extent(root, eb, i, &key,
1866                                                   active_node);
1867                         break;
1868                 default:
1869                         break;
1870                 };
1871         }
1872         return ret;
1873 }
1874
1875 struct node_refs {
1876         u64 bytenr[BTRFS_MAX_LEVEL];
1877         u64 refs[BTRFS_MAX_LEVEL];
1878         int need_check[BTRFS_MAX_LEVEL];
1879 };
1880
1881 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1882                              struct node_refs *nrefs, u64 level);
1883 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1884                             unsigned int ext_ref);
1885
1886 /*
1887  * Returns >0  Found error, not fatal, should continue
1888  * Returns <0  Fatal error, must exit the whole check
1889  * Returns 0   No errors found
1890  */
1891 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1892                                struct node_refs *nrefs, int *level, int ext_ref)
1893 {
1894         struct extent_buffer *cur = path->nodes[0];
1895         struct btrfs_key key;
1896         u64 cur_bytenr;
1897         u32 nritems;
1898         u64 first_ino = 0;
1899         int root_level = btrfs_header_level(root->node);
1900         int i;
1901         int ret = 0; /* Final return value */
1902         int err = 0; /* Positive error bitmap */
1903
1904         cur_bytenr = cur->start;
1905
1906         /* skip to first inode item or the first inode number change */
1907         nritems = btrfs_header_nritems(cur);
1908         for (i = 0; i < nritems; i++) {
1909                 btrfs_item_key_to_cpu(cur, &key, i);
1910                 if (i == 0)
1911                         first_ino = key.objectid;
1912                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1913                     (first_ino && first_ino != key.objectid))
1914                         break;
1915         }
1916         if (i == nritems) {
1917                 path->slots[0] = nritems;
1918                 return 0;
1919         }
1920         path->slots[0] = i;
1921
1922 again:
1923         err |= check_inode_item(root, path, ext_ref);
1924
1925         if (err & LAST_ITEM)
1926                 goto out;
1927
1928         /* still have inode items in thie leaf */
1929         if (cur->start == cur_bytenr)
1930                 goto again;
1931
1932         /*
1933          * we have switched to another leaf, above nodes may
1934          * have changed, here walk down the path, if a node
1935          * or leaf is shared, check whether we can skip this
1936          * node or leaf.
1937          */
1938         for (i = root_level; i >= 0; i--) {
1939                 if (path->nodes[i]->start == nrefs->bytenr[i])
1940                         continue;
1941
1942                 ret = update_nodes_refs(root,
1943                                 path->nodes[i]->start,
1944                                 nrefs, i);
1945                 if (ret)
1946                         goto out;
1947
1948                 if (!nrefs->need_check[i]) {
1949                         *level += 1;
1950                         break;
1951                 }
1952         }
1953
1954         for (i = 0; i < *level; i++) {
1955                 free_extent_buffer(path->nodes[i]);
1956                 path->nodes[i] = NULL;
1957         }
1958 out:
1959         err &= ~LAST_ITEM;
1960         if (err && !ret)
1961                 ret = err;
1962         return ret;
1963 }
1964
1965 static void reada_walk_down(struct btrfs_root *root,
1966                             struct extent_buffer *node, int slot)
1967 {
1968         u64 bytenr;
1969         u64 ptr_gen;
1970         u32 nritems;
1971         u32 blocksize;
1972         int i;
1973         int level;
1974
1975         level = btrfs_header_level(node);
1976         if (level != 1)
1977                 return;
1978
1979         nritems = btrfs_header_nritems(node);
1980         blocksize = root->nodesize;
1981         for (i = slot; i < nritems; i++) {
1982                 bytenr = btrfs_node_blockptr(node, i);
1983                 ptr_gen = btrfs_node_ptr_generation(node, i);
1984                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1985         }
1986 }
1987
1988 /*
1989  * Check the child node/leaf by the following condition:
1990  * 1. the first item key of the node/leaf should be the same with the one
1991  *    in parent.
1992  * 2. block in parent node should match the child node/leaf.
1993  * 3. generation of parent node and child's header should be consistent.
1994  *
1995  * Or the child node/leaf pointed by the key in parent is not valid.
1996  *
1997  * We hope to check leaf owner too, but since subvol may share leaves,
1998  * which makes leaf owner check not so strong, key check should be
1999  * sufficient enough for that case.
2000  */
2001 static int check_child_node(struct extent_buffer *parent, int slot,
2002                             struct extent_buffer *child)
2003 {
2004         struct btrfs_key parent_key;
2005         struct btrfs_key child_key;
2006         int ret = 0;
2007
2008         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2009         if (btrfs_header_level(child) == 0)
2010                 btrfs_item_key_to_cpu(child, &child_key, 0);
2011         else
2012                 btrfs_node_key_to_cpu(child, &child_key, 0);
2013
2014         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr,
2017                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2018                         parent_key.objectid, parent_key.type, parent_key.offset,
2019                         child_key.objectid, child_key.type, child_key.offset);
2020         }
2021         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2022                 ret = -EINVAL;
2023                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2024                         btrfs_node_blockptr(parent, slot),
2025                         btrfs_header_bytenr(child));
2026         }
2027         if (btrfs_node_ptr_generation(parent, slot) !=
2028             btrfs_header_generation(child)) {
2029                 ret = -EINVAL;
2030                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2031                         btrfs_header_generation(child),
2032                         btrfs_node_ptr_generation(parent, slot));
2033         }
2034         return ret;
2035 }
2036
2037 /*
2038  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2039  * in every fs or file tree check. Here we find its all root ids, and only check
2040  * it in the fs or file tree which has the smallest root id.
2041  */
2042 static int need_check(struct btrfs_root *root, struct ulist *roots)
2043 {
2044         struct rb_node *node;
2045         struct ulist_node *u;
2046
2047         if (roots->nnodes == 1)
2048                 return 1;
2049
2050         node = rb_first(&roots->root);
2051         u = rb_entry(node, struct ulist_node, rb_node);
2052         /*
2053          * current root id is not smallest, we skip it and let it be checked
2054          * in the fs or file tree who hash the smallest root id.
2055          */
2056         if (root->objectid != u->val)
2057                 return 0;
2058
2059         return 1;
2060 }
2061
2062 /*
2063  * for a tree node or leaf, we record its reference count, so later if we still
2064  * process this node or leaf, don't need to compute its reference count again.
2065  */
2066 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2067                              struct node_refs *nrefs, u64 level)
2068 {
2069         int check, ret;
2070         u64 refs;
2071         struct ulist *roots;
2072
2073         if (nrefs->bytenr[level] != bytenr) {
2074                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2075                                        level, 1, &refs, NULL);
2076                 if (ret < 0)
2077                         return ret;
2078
2079                 nrefs->bytenr[level] = bytenr;
2080                 nrefs->refs[level] = refs;
2081                 if (refs > 1) {
2082                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2083                                                    0, &roots);
2084                         if (ret)
2085                                 return -EIO;
2086
2087                         check = need_check(root, roots);
2088                         ulist_free(roots);
2089                         nrefs->need_check[level] = check;
2090                 } else {
2091                         nrefs->need_check[level] = 1;
2092                 }
2093         }
2094
2095         return 0;
2096 }
2097
2098 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2099                           struct walk_control *wc, int *level,
2100                           struct node_refs *nrefs)
2101 {
2102         enum btrfs_tree_block_status status;
2103         u64 bytenr;
2104         u64 ptr_gen;
2105         struct extent_buffer *next;
2106         struct extent_buffer *cur;
2107         u32 blocksize;
2108         int ret, err = 0;
2109         u64 refs;
2110
2111         WARN_ON(*level < 0);
2112         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2113
2114         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2115                 refs = nrefs->refs[*level];
2116                 ret = 0;
2117         } else {
2118                 ret = btrfs_lookup_extent_info(NULL, root,
2119                                        path->nodes[*level]->start,
2120                                        *level, 1, &refs, NULL);
2121                 if (ret < 0) {
2122                         err = ret;
2123                         goto out;
2124                 }
2125                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2126                 nrefs->refs[*level] = refs;
2127         }
2128
2129         if (refs > 1) {
2130                 ret = enter_shared_node(root, path->nodes[*level]->start,
2131                                         refs, wc, *level);
2132                 if (ret > 0) {
2133                         err = ret;
2134                         goto out;
2135                 }
2136         }
2137
2138         while (*level >= 0) {
2139                 WARN_ON(*level < 0);
2140                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2141                 cur = path->nodes[*level];
2142
2143                 if (btrfs_header_level(cur) != *level)
2144                         WARN_ON(1);
2145
2146                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2147                         break;
2148                 if (*level == 0) {
2149                         ret = process_one_leaf(root, cur, wc);
2150                         if (ret < 0)
2151                                 err = ret;
2152                         break;
2153                 }
2154                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2155                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2156                 blocksize = root->nodesize;
2157
2158                 if (bytenr == nrefs->bytenr[*level - 1]) {
2159                         refs = nrefs->refs[*level - 1];
2160                 } else {
2161                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2162                                         *level - 1, 1, &refs, NULL);
2163                         if (ret < 0) {
2164                                 refs = 0;
2165                         } else {
2166                                 nrefs->bytenr[*level - 1] = bytenr;
2167                                 nrefs->refs[*level - 1] = refs;
2168                         }
2169                 }
2170
2171                 if (refs > 1) {
2172                         ret = enter_shared_node(root, bytenr, refs,
2173                                                 wc, *level - 1);
2174                         if (ret > 0) {
2175                                 path->slots[*level]++;
2176                                 continue;
2177                         }
2178                 }
2179
2180                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2181                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2182                         free_extent_buffer(next);
2183                         reada_walk_down(root, cur, path->slots[*level]);
2184                         next = read_tree_block(root, bytenr, blocksize,
2185                                                ptr_gen);
2186                         if (!extent_buffer_uptodate(next)) {
2187                                 struct btrfs_key node_key;
2188
2189                                 btrfs_node_key_to_cpu(path->nodes[*level],
2190                                                       &node_key,
2191                                                       path->slots[*level]);
2192                                 btrfs_add_corrupt_extent_record(root->fs_info,
2193                                                 &node_key,
2194                                                 path->nodes[*level]->start,
2195                                                 root->nodesize, *level);
2196                                 err = -EIO;
2197                                 goto out;
2198                         }
2199                 }
2200
2201                 ret = check_child_node(cur, path->slots[*level], next);
2202                 if (ret) {
2203                         free_extent_buffer(next);
2204                         err = ret;
2205                         goto out;
2206                 }
2207
2208                 if (btrfs_is_leaf(next))
2209                         status = btrfs_check_leaf(root, NULL, next);
2210                 else
2211                         status = btrfs_check_node(root, NULL, next);
2212                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2213                         free_extent_buffer(next);
2214                         err = -EIO;
2215                         goto out;
2216                 }
2217
2218                 *level = *level - 1;
2219                 free_extent_buffer(path->nodes[*level]);
2220                 path->nodes[*level] = next;
2221                 path->slots[*level] = 0;
2222         }
2223 out:
2224         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2225         return err;
2226 }
2227
2228 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2229                             unsigned int ext_ref);
2230
2231 /*
2232  * Returns >0  Found error, should continue
2233  * Returns <0  Fatal error, must exit the whole check
2234  * Returns 0   No errors found
2235  */
2236 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2237                              int *level, struct node_refs *nrefs, int ext_ref)
2238 {
2239         enum btrfs_tree_block_status status;
2240         u64 bytenr;
2241         u64 ptr_gen;
2242         struct extent_buffer *next;
2243         struct extent_buffer *cur;
2244         u32 blocksize;
2245         int ret;
2246
2247         WARN_ON(*level < 0);
2248         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2249
2250         ret = update_nodes_refs(root, path->nodes[*level]->start,
2251                                 nrefs, *level);
2252         if (ret < 0)
2253                 return ret;
2254
2255         while (*level >= 0) {
2256                 WARN_ON(*level < 0);
2257                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2258                 cur = path->nodes[*level];
2259
2260                 if (btrfs_header_level(cur) != *level)
2261                         WARN_ON(1);
2262
2263                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2264                         break;
2265                 /* Don't forgot to check leaf/node validation */
2266                 if (*level == 0) {
2267                         ret = btrfs_check_leaf(root, NULL, cur);
2268                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2269                                 ret = -EIO;
2270                                 break;
2271                         }
2272                         ret = process_one_leaf_v2(root, path, nrefs,
2273                                                   level, ext_ref);
2274                         break;
2275                 } else {
2276                         ret = btrfs_check_node(root, NULL, cur);
2277                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278                                 ret = -EIO;
2279                                 break;
2280                         }
2281                 }
2282                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2283                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2284                 blocksize = root->nodesize;
2285
2286                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2287                 if (ret)
2288                         break;
2289                 if (!nrefs->need_check[*level - 1]) {
2290                         path->slots[*level]++;
2291                         continue;
2292                 }
2293
2294                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2295                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2296                         free_extent_buffer(next);
2297                         reada_walk_down(root, cur, path->slots[*level]);
2298                         next = read_tree_block(root, bytenr, blocksize,
2299                                                ptr_gen);
2300                         if (!extent_buffer_uptodate(next)) {
2301                                 struct btrfs_key node_key;
2302
2303                                 btrfs_node_key_to_cpu(path->nodes[*level],
2304                                                       &node_key,
2305                                                       path->slots[*level]);
2306                                 btrfs_add_corrupt_extent_record(root->fs_info,
2307                                                 &node_key,
2308                                                 path->nodes[*level]->start,
2309                                                 root->nodesize, *level);
2310                                 ret = -EIO;
2311                                 break;
2312                         }
2313                 }
2314
2315                 ret = check_child_node(cur, path->slots[*level], next);
2316                 if (ret < 0) 
2317                         break;
2318
2319                 if (btrfs_is_leaf(next))
2320                         status = btrfs_check_leaf(root, NULL, next);
2321                 else
2322                         status = btrfs_check_node(root, NULL, next);
2323                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2324                         free_extent_buffer(next);
2325                         ret = -EIO;
2326                         break;
2327                 }
2328
2329                 *level = *level - 1;
2330                 free_extent_buffer(path->nodes[*level]);
2331                 path->nodes[*level] = next;
2332                 path->slots[*level] = 0;
2333         }
2334         return ret;
2335 }
2336
2337 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2338                         struct walk_control *wc, int *level)
2339 {
2340         int i;
2341         struct extent_buffer *leaf;
2342
2343         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2344                 leaf = path->nodes[i];
2345                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2346                         path->slots[i]++;
2347                         *level = i;
2348                         return 0;
2349                 } else {
2350                         free_extent_buffer(path->nodes[*level]);
2351                         path->nodes[*level] = NULL;
2352                         BUG_ON(*level > wc->active_node);
2353                         if (*level == wc->active_node)
2354                                 leave_shared_node(root, wc, *level);
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2362                            int *level)
2363 {
2364         int i;
2365         struct extent_buffer *leaf;
2366
2367         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2368                 leaf = path->nodes[i];
2369                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2370                         path->slots[i]++;
2371                         *level = i;
2372                         return 0;
2373                 } else {
2374                         free_extent_buffer(path->nodes[*level]);
2375                         path->nodes[*level] = NULL;
2376                         *level = i + 1;
2377                 }
2378         }
2379         return 1;
2380 }
2381
2382 static int check_root_dir(struct inode_record *rec)
2383 {
2384         struct inode_backref *backref;
2385         int ret = -1;
2386
2387         if (!rec->found_inode_item || rec->errors)
2388                 goto out;
2389         if (rec->nlink != 1 || rec->found_link != 0)
2390                 goto out;
2391         if (list_empty(&rec->backrefs))
2392                 goto out;
2393         backref = to_inode_backref(rec->backrefs.next);
2394         if (!backref->found_inode_ref)
2395                 goto out;
2396         if (backref->index != 0 || backref->namelen != 2 ||
2397             memcmp(backref->name, "..", 2))
2398                 goto out;
2399         if (backref->found_dir_index || backref->found_dir_item)
2400                 goto out;
2401         ret = 0;
2402 out:
2403         return ret;
2404 }
2405
2406 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2407                               struct btrfs_root *root, struct btrfs_path *path,
2408                               struct inode_record *rec)
2409 {
2410         struct btrfs_inode_item *ei;
2411         struct btrfs_key key;
2412         int ret;
2413
2414         key.objectid = rec->ino;
2415         key.type = BTRFS_INODE_ITEM_KEY;
2416         key.offset = (u64)-1;
2417
2418         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2419         if (ret < 0)
2420                 goto out;
2421         if (ret) {
2422                 if (!path->slots[0]) {
2423                         ret = -ENOENT;
2424                         goto out;
2425                 }
2426                 path->slots[0]--;
2427                 ret = 0;
2428         }
2429         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2430         if (key.objectid != rec->ino) {
2431                 ret = -ENOENT;
2432                 goto out;
2433         }
2434
2435         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2436                             struct btrfs_inode_item);
2437         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2438         btrfs_mark_buffer_dirty(path->nodes[0]);
2439         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2440         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2441                root->root_key.objectid);
2442 out:
2443         btrfs_release_path(path);
2444         return ret;
2445 }
2446
2447 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2448                                     struct btrfs_root *root,
2449                                     struct btrfs_path *path,
2450                                     struct inode_record *rec)
2451 {
2452         int ret;
2453
2454         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2455         btrfs_release_path(path);
2456         if (!ret)
2457                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2458         return ret;
2459 }
2460
2461 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2462                                struct btrfs_root *root,
2463                                struct btrfs_path *path,
2464                                struct inode_record *rec)
2465 {
2466         struct btrfs_inode_item *ei;
2467         struct btrfs_key key;
2468         int ret = 0;
2469
2470         key.objectid = rec->ino;
2471         key.type = BTRFS_INODE_ITEM_KEY;
2472         key.offset = 0;
2473
2474         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2475         if (ret) {
2476                 if (ret > 0)
2477                         ret = -ENOENT;
2478                 goto out;
2479         }
2480
2481         /* Since ret == 0, no need to check anything */
2482         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2483                             struct btrfs_inode_item);
2484         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2485         btrfs_mark_buffer_dirty(path->nodes[0]);
2486         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2487         printf("reset nbytes for ino %llu root %llu\n",
2488                rec->ino, root->root_key.objectid);
2489 out:
2490         btrfs_release_path(path);
2491         return ret;
2492 }
2493
2494 static int add_missing_dir_index(struct btrfs_root *root,
2495                                  struct cache_tree *inode_cache,
2496                                  struct inode_record *rec,
2497                                  struct inode_backref *backref)
2498 {
2499         struct btrfs_path path;
2500         struct btrfs_trans_handle *trans;
2501         struct btrfs_dir_item *dir_item;
2502         struct extent_buffer *leaf;
2503         struct btrfs_key key;
2504         struct btrfs_disk_key disk_key;
2505         struct inode_record *dir_rec;
2506         unsigned long name_ptr;
2507         u32 data_size = sizeof(*dir_item) + backref->namelen;
2508         int ret;
2509
2510         trans = btrfs_start_transaction(root, 1);
2511         if (IS_ERR(trans))
2512                 return PTR_ERR(trans);
2513
2514         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2515                 (unsigned long long)rec->ino);
2516
2517         btrfs_init_path(&path);
2518         key.objectid = backref->dir;
2519         key.type = BTRFS_DIR_INDEX_KEY;
2520         key.offset = backref->index;
2521         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2522         BUG_ON(ret);
2523
2524         leaf = path.nodes[0];
2525         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2526
2527         disk_key.objectid = cpu_to_le64(rec->ino);
2528         disk_key.type = BTRFS_INODE_ITEM_KEY;
2529         disk_key.offset = 0;
2530
2531         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2532         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2533         btrfs_set_dir_data_len(leaf, dir_item, 0);
2534         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2535         name_ptr = (unsigned long)(dir_item + 1);
2536         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2537         btrfs_mark_buffer_dirty(leaf);
2538         btrfs_release_path(&path);
2539         btrfs_commit_transaction(trans, root);
2540
2541         backref->found_dir_index = 1;
2542         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2543         BUG_ON(IS_ERR(dir_rec));
2544         if (!dir_rec)
2545                 return 0;
2546         dir_rec->found_size += backref->namelen;
2547         if (dir_rec->found_size == dir_rec->isize &&
2548             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2549                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2550         if (dir_rec->found_size != dir_rec->isize)
2551                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2552
2553         return 0;
2554 }
2555
2556 static int delete_dir_index(struct btrfs_root *root,
2557                             struct inode_backref *backref)
2558 {
2559         struct btrfs_trans_handle *trans;
2560         struct btrfs_dir_item *di;
2561         struct btrfs_path path;
2562         int ret = 0;
2563
2564         trans = btrfs_start_transaction(root, 1);
2565         if (IS_ERR(trans))
2566                 return PTR_ERR(trans);
2567
2568         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2569                 (unsigned long long)backref->dir,
2570                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2571                 (unsigned long long)root->objectid);
2572
2573         btrfs_init_path(&path);
2574         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2575                                     backref->name, backref->namelen,
2576                                     backref->index, -1);
2577         if (IS_ERR(di)) {
2578                 ret = PTR_ERR(di);
2579                 btrfs_release_path(&path);
2580                 btrfs_commit_transaction(trans, root);
2581                 if (ret == -ENOENT)
2582                         return 0;
2583                 return ret;
2584         }
2585
2586         if (!di)
2587                 ret = btrfs_del_item(trans, root, &path);
2588         else
2589                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2590         BUG_ON(ret);
2591         btrfs_release_path(&path);
2592         btrfs_commit_transaction(trans, root);
2593         return ret;
2594 }
2595
2596 static int create_inode_item(struct btrfs_root *root,
2597                              struct inode_record *rec,
2598                              int root_dir)
2599 {
2600         struct btrfs_trans_handle *trans;
2601         struct btrfs_inode_item inode_item;
2602         time_t now = time(NULL);
2603         int ret;
2604
2605         trans = btrfs_start_transaction(root, 1);
2606         if (IS_ERR(trans)) {
2607                 ret = PTR_ERR(trans);
2608                 return ret;
2609         }
2610
2611         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2612                 "be incomplete, please check permissions and content after "
2613                 "the fsck completes.\n", (unsigned long long)root->objectid,
2614                 (unsigned long long)rec->ino);
2615
2616         memset(&inode_item, 0, sizeof(inode_item));
2617         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2618         if (root_dir)
2619                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2620         else
2621                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2622         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2623         if (rec->found_dir_item) {
2624                 if (rec->found_file_extent)
2625                         fprintf(stderr, "root %llu inode %llu has both a dir "
2626                                 "item and extents, unsure if it is a dir or a "
2627                                 "regular file so setting it as a directory\n",
2628                                 (unsigned long long)root->objectid,
2629                                 (unsigned long long)rec->ino);
2630                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2631                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2632         } else if (!rec->found_dir_item) {
2633                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2634                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2635         }
2636         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2637         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2638         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2639         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2640         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2641         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2642         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2643         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2644
2645         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2646         BUG_ON(ret);
2647         btrfs_commit_transaction(trans, root);
2648         return 0;
2649 }
2650
2651 static int repair_inode_backrefs(struct btrfs_root *root,
2652                                  struct inode_record *rec,
2653                                  struct cache_tree *inode_cache,
2654                                  int delete)
2655 {
2656         struct inode_backref *tmp, *backref;
2657         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2658         int ret = 0;
2659         int repaired = 0;
2660
2661         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2662                 if (!delete && rec->ino == root_dirid) {
2663                         if (!rec->found_inode_item) {
2664                                 ret = create_inode_item(root, rec, 1);
2665                                 if (ret)
2666                                         break;
2667                                 repaired++;
2668                         }
2669                 }
2670
2671                 /* Index 0 for root dir's are special, don't mess with it */
2672                 if (rec->ino == root_dirid && backref->index == 0)
2673                         continue;
2674
2675                 if (delete &&
2676                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2677                      (backref->found_dir_index && backref->found_inode_ref &&
2678                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2679                         ret = delete_dir_index(root, backref);
2680                         if (ret)
2681                                 break;
2682                         repaired++;
2683                         list_del(&backref->list);
2684                         free(backref);
2685                         continue;
2686                 }
2687
2688                 if (!delete && !backref->found_dir_index &&
2689                     backref->found_dir_item && backref->found_inode_ref) {
2690                         ret = add_missing_dir_index(root, inode_cache, rec,
2691                                                     backref);
2692                         if (ret)
2693                                 break;
2694                         repaired++;
2695                         if (backref->found_dir_item &&
2696                             backref->found_dir_index) {
2697                                 if (!backref->errors &&
2698                                     backref->found_inode_ref) {
2699                                         list_del(&backref->list);
2700                                         free(backref);
2701                                         continue;
2702                                 }
2703                         }
2704                 }
2705
2706                 if (!delete && (!backref->found_dir_index &&
2707                                 !backref->found_dir_item &&
2708                                 backref->found_inode_ref)) {
2709                         struct btrfs_trans_handle *trans;
2710                         struct btrfs_key location;
2711
2712                         ret = check_dir_conflict(root, backref->name,
2713                                                  backref->namelen,
2714                                                  backref->dir,
2715                                                  backref->index);
2716                         if (ret) {
2717                                 /*
2718                                  * let nlink fixing routine to handle it,
2719                                  * which can do it better.
2720                                  */
2721                                 ret = 0;
2722                                 break;
2723                         }
2724                         location.objectid = rec->ino;
2725                         location.type = BTRFS_INODE_ITEM_KEY;
2726                         location.offset = 0;
2727
2728                         trans = btrfs_start_transaction(root, 1);
2729                         if (IS_ERR(trans)) {
2730                                 ret = PTR_ERR(trans);
2731                                 break;
2732                         }
2733                         fprintf(stderr, "adding missing dir index/item pair "
2734                                 "for inode %llu\n",
2735                                 (unsigned long long)rec->ino);
2736                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2737                                                     backref->namelen,
2738                                                     backref->dir, &location,
2739                                                     imode_to_type(rec->imode),
2740                                                     backref->index);
2741                         BUG_ON(ret);
2742                         btrfs_commit_transaction(trans, root);
2743                         repaired++;
2744                 }
2745
2746                 if (!delete && (backref->found_inode_ref &&
2747                                 backref->found_dir_index &&
2748                                 backref->found_dir_item &&
2749                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2750                                 !rec->found_inode_item)) {
2751                         ret = create_inode_item(root, rec, 0);
2752                         if (ret)
2753                                 break;
2754                         repaired++;
2755                 }
2756
2757         }
2758         return ret ? ret : repaired;
2759 }
2760
2761 /*
2762  * To determine the file type for nlink/inode_item repair
2763  *
2764  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2765  * Return -ENOENT if file type is not found.
2766  */
2767 static int find_file_type(struct inode_record *rec, u8 *type)
2768 {
2769         struct inode_backref *backref;
2770
2771         /* For inode item recovered case */
2772         if (rec->found_inode_item) {
2773                 *type = imode_to_type(rec->imode);
2774                 return 0;
2775         }
2776
2777         list_for_each_entry(backref, &rec->backrefs, list) {
2778                 if (backref->found_dir_index || backref->found_dir_item) {
2779                         *type = backref->filetype;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /*
2787  * To determine the file name for nlink repair
2788  *
2789  * Return 0 if file name is found, set name and namelen.
2790  * Return -ENOENT if file name is not found.
2791  */
2792 static int find_file_name(struct inode_record *rec,
2793                           char *name, int *namelen)
2794 {
2795         struct inode_backref *backref;
2796
2797         list_for_each_entry(backref, &rec->backrefs, list) {
2798                 if (backref->found_dir_index || backref->found_dir_item ||
2799                     backref->found_inode_ref) {
2800                         memcpy(name, backref->name, backref->namelen);
2801                         *namelen = backref->namelen;
2802                         return 0;
2803                 }
2804         }
2805         return -ENOENT;
2806 }
2807
2808 /* Reset the nlink of the inode to the correct one */
2809 static int reset_nlink(struct btrfs_trans_handle *trans,
2810                        struct btrfs_root *root,
2811                        struct btrfs_path *path,
2812                        struct inode_record *rec)
2813 {
2814         struct inode_backref *backref;
2815         struct inode_backref *tmp;
2816         struct btrfs_key key;
2817         struct btrfs_inode_item *inode_item;
2818         int ret = 0;
2819
2820         /* We don't believe this either, reset it and iterate backref */
2821         rec->found_link = 0;
2822
2823         /* Remove all backref including the valid ones */
2824         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2825                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2826                                    backref->index, backref->name,
2827                                    backref->namelen, 0);
2828                 if (ret < 0)
2829                         goto out;
2830
2831                 /* remove invalid backref, so it won't be added back */
2832                 if (!(backref->found_dir_index &&
2833                       backref->found_dir_item &&
2834                       backref->found_inode_ref)) {
2835                         list_del(&backref->list);
2836                         free(backref);
2837                 } else {
2838                         rec->found_link++;
2839                 }
2840         }
2841
2842         /* Set nlink to 0 */
2843         key.objectid = rec->ino;
2844         key.type = BTRFS_INODE_ITEM_KEY;
2845         key.offset = 0;
2846         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2847         if (ret < 0)
2848                 goto out;
2849         if (ret > 0) {
2850                 ret = -ENOENT;
2851                 goto out;
2852         }
2853         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2854                                     struct btrfs_inode_item);
2855         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2856         btrfs_mark_buffer_dirty(path->nodes[0]);
2857         btrfs_release_path(path);
2858
2859         /*
2860          * Add back valid inode_ref/dir_item/dir_index,
2861          * add_link() will handle the nlink inc, so new nlink must be correct
2862          */
2863         list_for_each_entry(backref, &rec->backrefs, list) {
2864                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2865                                      backref->name, backref->namelen,
2866                                      backref->filetype, &backref->index, 1);
2867                 if (ret < 0)
2868                         goto out;
2869         }
2870 out:
2871         btrfs_release_path(path);
2872         return ret;
2873 }
2874
2875 static int get_highest_inode(struct btrfs_trans_handle *trans,
2876                                 struct btrfs_root *root,
2877                                 struct btrfs_path *path,
2878                                 u64 *highest_ino)
2879 {
2880         struct btrfs_key key, found_key;
2881         int ret;
2882
2883         btrfs_init_path(path);
2884         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2885         key.offset = -1;
2886         key.type = BTRFS_INODE_ITEM_KEY;
2887         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2888         if (ret == 1) {
2889                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2890                                 path->slots[0] - 1);
2891                 *highest_ino = found_key.objectid;
2892                 ret = 0;
2893         }
2894         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2895                 ret = -EOVERFLOW;
2896         btrfs_release_path(path);
2897         return ret;
2898 }
2899
2900 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2901                                struct btrfs_root *root,
2902                                struct btrfs_path *path,
2903                                struct inode_record *rec)
2904 {
2905         char *dir_name = "lost+found";
2906         char namebuf[BTRFS_NAME_LEN] = {0};
2907         u64 lost_found_ino;
2908         u32 mode = 0700;
2909         u8 type = 0;
2910         int namelen = 0;
2911         int name_recovered = 0;
2912         int type_recovered = 0;
2913         int ret = 0;
2914
2915         /*
2916          * Get file name and type first before these invalid inode ref
2917          * are deleted by remove_all_invalid_backref()
2918          */
2919         name_recovered = !find_file_name(rec, namebuf, &namelen);
2920         type_recovered = !find_file_type(rec, &type);
2921
2922         if (!name_recovered) {
2923                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2924                        rec->ino, rec->ino);
2925                 namelen = count_digits(rec->ino);
2926                 sprintf(namebuf, "%llu", rec->ino);
2927                 name_recovered = 1;
2928         }
2929         if (!type_recovered) {
2930                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2931                        rec->ino);
2932                 type = BTRFS_FT_REG_FILE;
2933                 type_recovered = 1;
2934         }
2935
2936         ret = reset_nlink(trans, root, path, rec);
2937         if (ret < 0) {
2938                 fprintf(stderr,
2939                         "Failed to reset nlink for inode %llu: %s\n",
2940                         rec->ino, strerror(-ret));
2941                 goto out;
2942         }
2943
2944         if (rec->found_link == 0) {
2945                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2946                 if (ret < 0)
2947                         goto out;
2948                 lost_found_ino++;
2949                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2950                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2951                                   mode);
2952                 if (ret < 0) {
2953                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2954                                 dir_name, strerror(-ret));
2955                         goto out;
2956                 }
2957                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2958                                      namebuf, namelen, type, NULL, 1);
2959                 /*
2960                  * Add ".INO" suffix several times to handle case where
2961                  * "FILENAME.INO" is already taken by another file.
2962                  */
2963                 while (ret == -EEXIST) {
2964                         /*
2965                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2966                          */
2967                         if (namelen + count_digits(rec->ino) + 1 >
2968                             BTRFS_NAME_LEN) {
2969                                 ret = -EFBIG;
2970                                 goto out;
2971                         }
2972                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2973                                  ".%llu", rec->ino);
2974                         namelen += count_digits(rec->ino) + 1;
2975                         ret = btrfs_add_link(trans, root, rec->ino,
2976                                              lost_found_ino, namebuf,
2977                                              namelen, type, NULL, 1);
2978                 }
2979                 if (ret < 0) {
2980                         fprintf(stderr,
2981                                 "Failed to link the inode %llu to %s dir: %s\n",
2982                                 rec->ino, dir_name, strerror(-ret));
2983                         goto out;
2984                 }
2985                 /*
2986                  * Just increase the found_link, don't actually add the
2987                  * backref. This will make things easier and this inode
2988                  * record will be freed after the repair is done.
2989                  * So fsck will not report problem about this inode.
2990                  */
2991                 rec->found_link++;
2992                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2993                        namelen, namebuf, dir_name);
2994         }
2995         printf("Fixed the nlink of inode %llu\n", rec->ino);
2996 out:
2997         /*
2998          * Clear the flag anyway, or we will loop forever for the same inode
2999          * as it will not be removed from the bad inode list and the dead loop
3000          * happens.
3001          */
3002         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3003         btrfs_release_path(path);
3004         return ret;
3005 }
3006
3007 /*
3008  * Check if there is any normal(reg or prealloc) file extent for given
3009  * ino.
3010  * This is used to determine the file type when neither its dir_index/item or
3011  * inode_item exists.
3012  *
3013  * This will *NOT* report error, if any error happens, just consider it does
3014  * not have any normal file extent.
3015  */
3016 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3017 {
3018         struct btrfs_path path;
3019         struct btrfs_key key;
3020         struct btrfs_key found_key;
3021         struct btrfs_file_extent_item *fi;
3022         u8 type;
3023         int ret = 0;
3024
3025         btrfs_init_path(&path);
3026         key.objectid = ino;
3027         key.type = BTRFS_EXTENT_DATA_KEY;
3028         key.offset = 0;
3029
3030         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3031         if (ret < 0) {
3032                 ret = 0;
3033                 goto out;
3034         }
3035         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3036                 ret = btrfs_next_leaf(root, &path);
3037                 if (ret) {
3038                         ret = 0;
3039                         goto out;
3040                 }
3041         }
3042         while (1) {
3043                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3044                                       path.slots[0]);
3045                 if (found_key.objectid != ino ||
3046                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3047                         break;
3048                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3049                                     struct btrfs_file_extent_item);
3050                 type = btrfs_file_extent_type(path.nodes[0], fi);
3051                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3052                         ret = 1;
3053                         goto out;
3054                 }
3055         }
3056 out:
3057         btrfs_release_path(&path);
3058         return ret;
3059 }
3060
3061 static u32 btrfs_type_to_imode(u8 type)
3062 {
3063         static u32 imode_by_btrfs_type[] = {
3064                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3065                 [BTRFS_FT_DIR]          = S_IFDIR,
3066                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3067                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3068                 [BTRFS_FT_FIFO]         = S_IFIFO,
3069                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3070                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3071         };
3072
3073         return imode_by_btrfs_type[(type)];
3074 }
3075
3076 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3077                                 struct btrfs_root *root,
3078                                 struct btrfs_path *path,
3079                                 struct inode_record *rec)
3080 {
3081         u8 filetype;
3082         u32 mode = 0700;
3083         int type_recovered = 0;
3084         int ret = 0;
3085
3086         printf("Trying to rebuild inode:%llu\n", rec->ino);
3087
3088         type_recovered = !find_file_type(rec, &filetype);
3089
3090         /*
3091          * Try to determine inode type if type not found.
3092          *
3093          * For found regular file extent, it must be FILE.
3094          * For found dir_item/index, it must be DIR.
3095          *
3096          * For undetermined one, use FILE as fallback.
3097          *
3098          * TODO:
3099          * 1. If found backref(inode_index/item is already handled) to it,
3100          *    it must be DIR.
3101          *    Need new inode-inode ref structure to allow search for that.
3102          */
3103         if (!type_recovered) {
3104                 if (rec->found_file_extent &&
3105                     find_normal_file_extent(root, rec->ino)) {
3106                         type_recovered = 1;
3107                         filetype = BTRFS_FT_REG_FILE;
3108                 } else if (rec->found_dir_item) {
3109                         type_recovered = 1;
3110                         filetype = BTRFS_FT_DIR;
3111                 } else if (!list_empty(&rec->orphan_extents)) {
3112                         type_recovered = 1;
3113                         filetype = BTRFS_FT_REG_FILE;
3114                 } else{
3115                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3116                                rec->ino);
3117                         type_recovered = 1;
3118                         filetype = BTRFS_FT_REG_FILE;
3119                 }
3120         }
3121
3122         ret = btrfs_new_inode(trans, root, rec->ino,
3123                               mode | btrfs_type_to_imode(filetype));
3124         if (ret < 0)
3125                 goto out;
3126
3127         /*
3128          * Here inode rebuild is done, we only rebuild the inode item,
3129          * don't repair the nlink(like move to lost+found).
3130          * That is the job of nlink repair.
3131          *
3132          * We just fill the record and return
3133          */
3134         rec->found_dir_item = 1;
3135         rec->imode = mode | btrfs_type_to_imode(filetype);
3136         rec->nlink = 0;
3137         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3138         /* Ensure the inode_nlinks repair function will be called */
3139         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3140 out:
3141         return ret;
3142 }
3143
3144 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3145                                       struct btrfs_root *root,
3146                                       struct btrfs_path *path,
3147                                       struct inode_record *rec)
3148 {
3149         struct orphan_data_extent *orphan;
3150         struct orphan_data_extent *tmp;
3151         int ret = 0;
3152
3153         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3154                 /*
3155                  * Check for conflicting file extents
3156                  *
3157                  * Here we don't know whether the extents is compressed or not,
3158                  * so we can only assume it not compressed nor data offset,
3159                  * and use its disk_len as extent length.
3160                  */
3161                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3162                                        orphan->offset, orphan->disk_len, 0);
3163                 btrfs_release_path(path);
3164                 if (ret < 0)
3165                         goto out;
3166                 if (!ret) {
3167                         fprintf(stderr,
3168                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3169                                 orphan->disk_bytenr, orphan->disk_len);
3170                         ret = btrfs_free_extent(trans,
3171                                         root->fs_info->extent_root,
3172                                         orphan->disk_bytenr, orphan->disk_len,
3173                                         0, root->objectid, orphan->objectid,
3174                                         orphan->offset);
3175                         if (ret < 0)
3176                                 goto out;
3177                 }
3178                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3179                                 orphan->offset, orphan->disk_bytenr,
3180                                 orphan->disk_len, orphan->disk_len);
3181                 if (ret < 0)
3182                         goto out;
3183
3184                 /* Update file size info */
3185                 rec->found_size += orphan->disk_len;
3186                 if (rec->found_size == rec->nbytes)
3187                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3188
3189                 /* Update the file extent hole info too */
3190                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3191                                            orphan->disk_len);
3192                 if (ret < 0)
3193                         goto out;
3194                 if (RB_EMPTY_ROOT(&rec->holes))
3195                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3196
3197                 list_del(&orphan->list);
3198                 free(orphan);
3199         }
3200         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3201 out:
3202         return ret;
3203 }
3204
3205 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3206                                         struct btrfs_root *root,
3207                                         struct btrfs_path *path,
3208                                         struct inode_record *rec)
3209 {
3210         struct rb_node *node;
3211         struct file_extent_hole *hole;
3212         int found = 0;
3213         int ret = 0;
3214
3215         node = rb_first(&rec->holes);
3216
3217         while (node) {
3218                 found = 1;
3219                 hole = rb_entry(node, struct file_extent_hole, node);
3220                 ret = btrfs_punch_hole(trans, root, rec->ino,
3221                                        hole->start, hole->len);
3222                 if (ret < 0)
3223                         goto out;
3224                 ret = del_file_extent_hole(&rec->holes, hole->start,
3225                                            hole->len);
3226                 if (ret < 0)
3227                         goto out;
3228                 if (RB_EMPTY_ROOT(&rec->holes))
3229                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3230                 node = rb_first(&rec->holes);
3231         }
3232         /* special case for a file losing all its file extent */
3233         if (!found) {
3234                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3235                                        round_up(rec->isize, root->sectorsize));
3236                 if (ret < 0)
3237                         goto out;
3238         }
3239         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3240                rec->ino, root->objectid);
3241 out:
3242         return ret;
3243 }
3244
3245 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3246 {
3247         struct btrfs_trans_handle *trans;
3248         struct btrfs_path path;
3249         int ret = 0;
3250
3251         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3252                              I_ERR_NO_ORPHAN_ITEM |
3253                              I_ERR_LINK_COUNT_WRONG |
3254                              I_ERR_NO_INODE_ITEM |
3255                              I_ERR_FILE_EXTENT_ORPHAN |
3256                              I_ERR_FILE_EXTENT_DISCOUNT|
3257                              I_ERR_FILE_NBYTES_WRONG)))
3258                 return rec->errors;
3259
3260         /*
3261          * For nlink repair, it may create a dir and add link, so
3262          * 2 for parent(256)'s dir_index and dir_item
3263          * 2 for lost+found dir's inode_item and inode_ref
3264          * 1 for the new inode_ref of the file
3265          * 2 for lost+found dir's dir_index and dir_item for the file
3266          */
3267         trans = btrfs_start_transaction(root, 7);
3268         if (IS_ERR(trans))
3269                 return PTR_ERR(trans);
3270
3271         btrfs_init_path(&path);
3272         if (rec->errors & I_ERR_NO_INODE_ITEM)
3273                 ret = repair_inode_no_item(trans, root, &path, rec);
3274         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3275                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3276         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3277                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3278         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3279                 ret = repair_inode_isize(trans, root, &path, rec);
3280         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3281                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3282         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3283                 ret = repair_inode_nlinks(trans, root, &path, rec);
3284         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3285                 ret = repair_inode_nbytes(trans, root, &path, rec);
3286         btrfs_commit_transaction(trans, root);
3287         btrfs_release_path(&path);
3288         return ret;
3289 }
3290
3291 static int check_inode_recs(struct btrfs_root *root,
3292                             struct cache_tree *inode_cache)
3293 {
3294         struct cache_extent *cache;
3295         struct ptr_node *node;
3296         struct inode_record *rec;
3297         struct inode_backref *backref;
3298         int stage = 0;
3299         int ret = 0;
3300         int err = 0;
3301         u64 error = 0;
3302         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3303
3304         if (btrfs_root_refs(&root->root_item) == 0) {
3305                 if (!cache_tree_empty(inode_cache))
3306                         fprintf(stderr, "warning line %d\n", __LINE__);
3307                 return 0;
3308         }
3309
3310         /*
3311          * We need to repair backrefs first because we could change some of the
3312          * errors in the inode recs.
3313          *
3314          * We also need to go through and delete invalid backrefs first and then
3315          * add the correct ones second.  We do this because we may get EEXIST
3316          * when adding back the correct index because we hadn't yet deleted the
3317          * invalid index.
3318          *
3319          * For example, if we were missing a dir index then the directories
3320          * isize would be wrong, so if we fixed the isize to what we thought it
3321          * would be and then fixed the backref we'd still have a invalid fs, so
3322          * we need to add back the dir index and then check to see if the isize
3323          * is still wrong.
3324          */
3325         while (stage < 3) {
3326                 stage++;
3327                 if (stage == 3 && !err)
3328                         break;
3329
3330                 cache = search_cache_extent(inode_cache, 0);
3331                 while (repair && cache) {
3332                         node = container_of(cache, struct ptr_node, cache);
3333                         rec = node->data;
3334                         cache = next_cache_extent(cache);
3335
3336                         /* Need to free everything up and rescan */
3337                         if (stage == 3) {
3338                                 remove_cache_extent(inode_cache, &node->cache);
3339                                 free(node);
3340                                 free_inode_rec(rec);
3341                                 continue;
3342                         }
3343
3344                         if (list_empty(&rec->backrefs))
3345                                 continue;
3346
3347                         ret = repair_inode_backrefs(root, rec, inode_cache,
3348                                                     stage == 1);
3349                         if (ret < 0) {
3350                                 err = ret;
3351                                 stage = 2;
3352                                 break;
3353                         } if (ret > 0) {
3354                                 err = -EAGAIN;
3355                         }
3356                 }
3357         }
3358         if (err)
3359                 return err;
3360
3361         rec = get_inode_rec(inode_cache, root_dirid, 0);
3362         BUG_ON(IS_ERR(rec));
3363         if (rec) {
3364                 ret = check_root_dir(rec);
3365                 if (ret) {
3366                         fprintf(stderr, "root %llu root dir %llu error\n",
3367                                 (unsigned long long)root->root_key.objectid,
3368                                 (unsigned long long)root_dirid);
3369                         print_inode_error(root, rec);
3370                         error++;
3371                 }
3372         } else {
3373                 if (repair) {
3374                         struct btrfs_trans_handle *trans;
3375
3376                         trans = btrfs_start_transaction(root, 1);
3377                         if (IS_ERR(trans)) {
3378                                 err = PTR_ERR(trans);
3379                                 return err;
3380                         }
3381
3382                         fprintf(stderr,
3383                                 "root %llu missing its root dir, recreating\n",
3384                                 (unsigned long long)root->objectid);
3385
3386                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3387                         BUG_ON(ret);
3388
3389                         btrfs_commit_transaction(trans, root);
3390                         return -EAGAIN;
3391                 }
3392
3393                 fprintf(stderr, "root %llu root dir %llu not found\n",
3394                         (unsigned long long)root->root_key.objectid,
3395                         (unsigned long long)root_dirid);
3396         }
3397
3398         while (1) {
3399                 cache = search_cache_extent(inode_cache, 0);
3400                 if (!cache)
3401                         break;
3402                 node = container_of(cache, struct ptr_node, cache);
3403                 rec = node->data;
3404                 remove_cache_extent(inode_cache, &node->cache);
3405                 free(node);
3406                 if (rec->ino == root_dirid ||
3407                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3408                         free_inode_rec(rec);
3409                         continue;
3410                 }
3411
3412                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3413                         ret = check_orphan_item(root, rec->ino);
3414                         if (ret == 0)
3415                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3416                         if (can_free_inode_rec(rec)) {
3417                                 free_inode_rec(rec);
3418                                 continue;
3419                         }
3420                 }
3421
3422                 if (!rec->found_inode_item)
3423                         rec->errors |= I_ERR_NO_INODE_ITEM;
3424                 if (rec->found_link != rec->nlink)
3425                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3426                 if (repair) {
3427                         ret = try_repair_inode(root, rec);
3428                         if (ret == 0 && can_free_inode_rec(rec)) {
3429                                 free_inode_rec(rec);
3430                                 continue;
3431                         }
3432                         ret = 0;
3433                 }
3434
3435                 if (!(repair && ret == 0))
3436                         error++;
3437                 print_inode_error(root, rec);
3438                 list_for_each_entry(backref, &rec->backrefs, list) {
3439                         if (!backref->found_dir_item)
3440                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3441                         if (!backref->found_dir_index)
3442                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3443                         if (!backref->found_inode_ref)
3444                                 backref->errors |= REF_ERR_NO_INODE_REF;
3445                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3446                                 " namelen %u name %s filetype %d errors %x",
3447                                 (unsigned long long)backref->dir,
3448                                 (unsigned long long)backref->index,
3449                                 backref->namelen, backref->name,
3450                                 backref->filetype, backref->errors);
3451                         print_ref_error(backref->errors);
3452                 }
3453                 free_inode_rec(rec);
3454         }
3455         return (error > 0) ? -1 : 0;
3456 }
3457
3458 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3459                                         u64 objectid)
3460 {
3461         struct cache_extent *cache;
3462         struct root_record *rec = NULL;
3463         int ret;
3464
3465         cache = lookup_cache_extent(root_cache, objectid, 1);
3466         if (cache) {
3467                 rec = container_of(cache, struct root_record, cache);
3468         } else {
3469                 rec = calloc(1, sizeof(*rec));
3470                 if (!rec)
3471                         return ERR_PTR(-ENOMEM);
3472                 rec->objectid = objectid;
3473                 INIT_LIST_HEAD(&rec->backrefs);
3474                 rec->cache.start = objectid;
3475                 rec->cache.size = 1;
3476
3477                 ret = insert_cache_extent(root_cache, &rec->cache);
3478                 if (ret)
3479                         return ERR_PTR(-EEXIST);
3480         }
3481         return rec;
3482 }
3483
3484 static struct root_backref *get_root_backref(struct root_record *rec,
3485                                              u64 ref_root, u64 dir, u64 index,
3486                                              const char *name, int namelen)
3487 {
3488         struct root_backref *backref;
3489
3490         list_for_each_entry(backref, &rec->backrefs, list) {
3491                 if (backref->ref_root != ref_root || backref->dir != dir ||
3492                     backref->namelen != namelen)
3493                         continue;
3494                 if (memcmp(name, backref->name, namelen))
3495                         continue;
3496                 return backref;
3497         }
3498
3499         backref = calloc(1, sizeof(*backref) + namelen + 1);
3500         if (!backref)
3501                 return NULL;
3502         backref->ref_root = ref_root;
3503         backref->dir = dir;
3504         backref->index = index;
3505         backref->namelen = namelen;
3506         memcpy(backref->name, name, namelen);
3507         backref->name[namelen] = '\0';
3508         list_add_tail(&backref->list, &rec->backrefs);
3509         return backref;
3510 }
3511
3512 static void free_root_record(struct cache_extent *cache)
3513 {
3514         struct root_record *rec;
3515         struct root_backref *backref;
3516
3517         rec = container_of(cache, struct root_record, cache);
3518         while (!list_empty(&rec->backrefs)) {
3519                 backref = to_root_backref(rec->backrefs.next);
3520                 list_del(&backref->list);
3521                 free(backref);
3522         }
3523
3524         free(rec);
3525 }
3526
3527 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3528
3529 static int add_root_backref(struct cache_tree *root_cache,
3530                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3531                             const char *name, int namelen,
3532                             int item_type, int errors)
3533 {
3534         struct root_record *rec;
3535         struct root_backref *backref;
3536
3537         rec = get_root_rec(root_cache, root_id);
3538         BUG_ON(IS_ERR(rec));
3539         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3540         BUG_ON(!backref);
3541
3542         backref->errors |= errors;
3543
3544         if (item_type != BTRFS_DIR_ITEM_KEY) {
3545                 if (backref->found_dir_index || backref->found_back_ref ||
3546                     backref->found_forward_ref) {
3547                         if (backref->index != index)
3548                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3549                 } else {
3550                         backref->index = index;
3551                 }
3552         }
3553
3554         if (item_type == BTRFS_DIR_ITEM_KEY) {
3555                 if (backref->found_forward_ref)
3556                         rec->found_ref++;
3557                 backref->found_dir_item = 1;
3558         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3559                 backref->found_dir_index = 1;
3560         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3561                 if (backref->found_forward_ref)
3562                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3563                 else if (backref->found_dir_item)
3564                         rec->found_ref++;
3565                 backref->found_forward_ref = 1;
3566         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3567                 if (backref->found_back_ref)
3568                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3569                 backref->found_back_ref = 1;
3570         } else {
3571                 BUG_ON(1);
3572         }
3573
3574         if (backref->found_forward_ref && backref->found_dir_item)
3575                 backref->reachable = 1;
3576         return 0;
3577 }
3578
3579 static int merge_root_recs(struct btrfs_root *root,
3580                            struct cache_tree *src_cache,
3581                            struct cache_tree *dst_cache)
3582 {
3583         struct cache_extent *cache;
3584         struct ptr_node *node;
3585         struct inode_record *rec;
3586         struct inode_backref *backref;
3587         int ret = 0;
3588
3589         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3590                 free_inode_recs_tree(src_cache);
3591                 return 0;
3592         }
3593
3594         while (1) {
3595                 cache = search_cache_extent(src_cache, 0);
3596                 if (!cache)
3597                         break;
3598                 node = container_of(cache, struct ptr_node, cache);
3599                 rec = node->data;
3600                 remove_cache_extent(src_cache, &node->cache);
3601                 free(node);
3602
3603                 ret = is_child_root(root, root->objectid, rec->ino);
3604                 if (ret < 0)
3605                         break;
3606                 else if (ret == 0)
3607                         goto skip;
3608
3609                 list_for_each_entry(backref, &rec->backrefs, list) {
3610                         BUG_ON(backref->found_inode_ref);
3611                         if (backref->found_dir_item)
3612                                 add_root_backref(dst_cache, rec->ino,
3613                                         root->root_key.objectid, backref->dir,
3614                                         backref->index, backref->name,
3615                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3616                                         backref->errors);
3617                         if (backref->found_dir_index)
3618                                 add_root_backref(dst_cache, rec->ino,
3619                                         root->root_key.objectid, backref->dir,
3620                                         backref->index, backref->name,
3621                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3622                                         backref->errors);
3623                 }
3624 skip:
3625                 free_inode_rec(rec);
3626         }
3627         if (ret < 0)
3628                 return ret;
3629         return 0;
3630 }
3631
3632 static int check_root_refs(struct btrfs_root *root,
3633                            struct cache_tree *root_cache)
3634 {
3635         struct root_record *rec;
3636         struct root_record *ref_root;
3637         struct root_backref *backref;
3638         struct cache_extent *cache;
3639         int loop = 1;
3640         int ret;
3641         int error;
3642         int errors = 0;
3643
3644         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3645         BUG_ON(IS_ERR(rec));
3646         rec->found_ref = 1;
3647
3648         /* fixme: this can not detect circular references */
3649         while (loop) {
3650                 loop = 0;
3651                 cache = search_cache_extent(root_cache, 0);
3652                 while (1) {
3653                         if (!cache)
3654                                 break;
3655                         rec = container_of(cache, struct root_record, cache);
3656                         cache = next_cache_extent(cache);
3657
3658                         if (rec->found_ref == 0)
3659                                 continue;
3660
3661                         list_for_each_entry(backref, &rec->backrefs, list) {
3662                                 if (!backref->reachable)
3663                                         continue;
3664
3665                                 ref_root = get_root_rec(root_cache,
3666                                                         backref->ref_root);
3667                                 BUG_ON(IS_ERR(ref_root));
3668                                 if (ref_root->found_ref > 0)
3669                                         continue;
3670
3671                                 backref->reachable = 0;
3672                                 rec->found_ref--;
3673                                 if (rec->found_ref == 0)
3674                                         loop = 1;
3675                         }
3676                 }
3677         }
3678
3679         cache = search_cache_extent(root_cache, 0);
3680         while (1) {
3681                 if (!cache)
3682                         break;
3683                 rec = container_of(cache, struct root_record, cache);
3684                 cache = next_cache_extent(cache);
3685
3686                 if (rec->found_ref == 0 &&
3687                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3688                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3689                         ret = check_orphan_item(root->fs_info->tree_root,
3690                                                 rec->objectid);
3691                         if (ret == 0)
3692                                 continue;
3693
3694                         /*
3695                          * If we don't have a root item then we likely just have
3696                          * a dir item in a snapshot for this root but no actual
3697                          * ref key or anything so it's meaningless.
3698                          */
3699                         if (!rec->found_root_item)
3700                                 continue;
3701                         errors++;
3702                         fprintf(stderr, "fs tree %llu not referenced\n",
3703                                 (unsigned long long)rec->objectid);
3704                 }
3705
3706                 error = 0;
3707                 if (rec->found_ref > 0 && !rec->found_root_item)
3708                         error = 1;
3709                 list_for_each_entry(backref, &rec->backrefs, list) {
3710                         if (!backref->found_dir_item)
3711                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3712                         if (!backref->found_dir_index)
3713                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3714                         if (!backref->found_back_ref)
3715                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3716                         if (!backref->found_forward_ref)
3717                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3718                         if (backref->reachable && backref->errors)
3719                                 error = 1;
3720                 }
3721                 if (!error)
3722                         continue;
3723
3724                 errors++;
3725                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3726                         (unsigned long long)rec->objectid, rec->found_ref,
3727                          rec->found_root_item ? "" : "not found");
3728
3729                 list_for_each_entry(backref, &rec->backrefs, list) {
3730                         if (!backref->reachable)
3731                                 continue;
3732                         if (!backref->errors && rec->found_root_item)
3733                                 continue;
3734                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3735                                 " index %llu namelen %u name %s errors %x\n",
3736                                 (unsigned long long)backref->ref_root,
3737                                 (unsigned long long)backref->dir,
3738                                 (unsigned long long)backref->index,
3739                                 backref->namelen, backref->name,
3740                                 backref->errors);
3741                         print_ref_error(backref->errors);
3742                 }
3743         }
3744         return errors > 0 ? 1 : 0;
3745 }
3746
3747 static int process_root_ref(struct extent_buffer *eb, int slot,
3748                             struct btrfs_key *key,
3749                             struct cache_tree *root_cache)
3750 {
3751         u64 dirid;
3752         u64 index;
3753         u32 len;
3754         u32 name_len;
3755         struct btrfs_root_ref *ref;
3756         char namebuf[BTRFS_NAME_LEN];
3757         int error;
3758
3759         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3760
3761         dirid = btrfs_root_ref_dirid(eb, ref);
3762         index = btrfs_root_ref_sequence(eb, ref);
3763         name_len = btrfs_root_ref_name_len(eb, ref);
3764
3765         if (name_len <= BTRFS_NAME_LEN) {
3766                 len = name_len;
3767                 error = 0;
3768         } else {
3769                 len = BTRFS_NAME_LEN;
3770                 error = REF_ERR_NAME_TOO_LONG;
3771         }
3772         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3773
3774         if (key->type == BTRFS_ROOT_REF_KEY) {
3775                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3776                                  index, namebuf, len, key->type, error);
3777         } else {
3778                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3779                                  index, namebuf, len, key->type, error);
3780         }
3781         return 0;
3782 }
3783
3784 static void free_corrupt_block(struct cache_extent *cache)
3785 {
3786         struct btrfs_corrupt_block *corrupt;
3787
3788         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3789         free(corrupt);
3790 }
3791
3792 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3793
3794 /*
3795  * Repair the btree of the given root.
3796  *
3797  * The fix is to remove the node key in corrupt_blocks cache_tree.
3798  * and rebalance the tree.
3799  * After the fix, the btree should be writeable.
3800  */
3801 static int repair_btree(struct btrfs_root *root,
3802                         struct cache_tree *corrupt_blocks)
3803 {
3804         struct btrfs_trans_handle *trans;
3805         struct btrfs_path path;
3806         struct btrfs_corrupt_block *corrupt;
3807         struct cache_extent *cache;
3808         struct btrfs_key key;
3809         u64 offset;
3810         int level;
3811         int ret = 0;
3812
3813         if (cache_tree_empty(corrupt_blocks))
3814                 return 0;
3815
3816         trans = btrfs_start_transaction(root, 1);
3817         if (IS_ERR(trans)) {
3818                 ret = PTR_ERR(trans);
3819                 fprintf(stderr, "Error starting transaction: %s\n",
3820                         strerror(-ret));
3821                 return ret;
3822         }
3823         btrfs_init_path(&path);
3824         cache = first_cache_extent(corrupt_blocks);
3825         while (cache) {
3826                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3827                                        cache);
3828                 level = corrupt->level;
3829                 path.lowest_level = level;
3830                 key.objectid = corrupt->key.objectid;
3831                 key.type = corrupt->key.type;
3832                 key.offset = corrupt->key.offset;
3833
3834                 /*
3835                  * Here we don't want to do any tree balance, since it may
3836                  * cause a balance with corrupted brother leaf/node,
3837                  * so ins_len set to 0 here.
3838                  * Balance will be done after all corrupt node/leaf is deleted.
3839                  */
3840                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3841                 if (ret < 0)
3842                         goto out;
3843                 offset = btrfs_node_blockptr(path.nodes[level],
3844                                              path.slots[level]);
3845
3846                 /* Remove the ptr */
3847                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3848                 if (ret < 0)
3849                         goto out;
3850                 /*
3851                  * Remove the corresponding extent
3852                  * return value is not concerned.
3853                  */
3854                 btrfs_release_path(&path);
3855                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3856                                         0, root->root_key.objectid,
3857                                         level - 1, 0);
3858                 cache = next_cache_extent(cache);
3859         }
3860
3861         /* Balance the btree using btrfs_search_slot() */
3862         cache = first_cache_extent(corrupt_blocks);
3863         while (cache) {
3864                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3865                                        cache);
3866                 memcpy(&key, &corrupt->key, sizeof(key));
3867                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3868                 if (ret < 0)
3869                         goto out;
3870                 /* return will always >0 since it won't find the item */
3871                 ret = 0;
3872                 btrfs_release_path(&path);
3873                 cache = next_cache_extent(cache);
3874         }
3875 out:
3876         btrfs_commit_transaction(trans, root);
3877         btrfs_release_path(&path);
3878         return ret;
3879 }
3880
3881 static int check_fs_root(struct btrfs_root *root,
3882                          struct cache_tree *root_cache,
3883                          struct walk_control *wc)
3884 {
3885         int ret = 0;
3886         int err = 0;
3887         int wret;
3888         int level;
3889         struct btrfs_path path;
3890         struct shared_node root_node;
3891         struct root_record *rec;
3892         struct btrfs_root_item *root_item = &root->root_item;
3893         struct cache_tree corrupt_blocks;
3894         struct orphan_data_extent *orphan;
3895         struct orphan_data_extent *tmp;
3896         enum btrfs_tree_block_status status;
3897         struct node_refs nrefs;
3898
3899         /*
3900          * Reuse the corrupt_block cache tree to record corrupted tree block
3901          *
3902          * Unlike the usage in extent tree check, here we do it in a per
3903          * fs/subvol tree base.
3904          */
3905         cache_tree_init(&corrupt_blocks);
3906         root->fs_info->corrupt_blocks = &corrupt_blocks;
3907
3908         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3909                 rec = get_root_rec(root_cache, root->root_key.objectid);
3910                 BUG_ON(IS_ERR(rec));
3911                 if (btrfs_root_refs(root_item) > 0)
3912                         rec->found_root_item = 1;
3913         }
3914
3915         btrfs_init_path(&path);
3916         memset(&root_node, 0, sizeof(root_node));
3917         cache_tree_init(&root_node.root_cache);
3918         cache_tree_init(&root_node.inode_cache);
3919         memset(&nrefs, 0, sizeof(nrefs));
3920
3921         /* Move the orphan extent record to corresponding inode_record */
3922         list_for_each_entry_safe(orphan, tmp,
3923                                  &root->orphan_data_extents, list) {
3924                 struct inode_record *inode;
3925
3926                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3927                                       1);
3928                 BUG_ON(IS_ERR(inode));
3929                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3930                 list_move(&orphan->list, &inode->orphan_extents);
3931         }
3932
3933         level = btrfs_header_level(root->node);
3934         memset(wc->nodes, 0, sizeof(wc->nodes));
3935         wc->nodes[level] = &root_node;
3936         wc->active_node = level;
3937         wc->root_level = level;
3938
3939         /* We may not have checked the root block, lets do that now */
3940         if (btrfs_is_leaf(root->node))
3941                 status = btrfs_check_leaf(root, NULL, root->node);
3942         else
3943                 status = btrfs_check_node(root, NULL, root->node);
3944         if (status != BTRFS_TREE_BLOCK_CLEAN)
3945                 return -EIO;
3946
3947         if (btrfs_root_refs(root_item) > 0 ||
3948             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3949                 path.nodes[level] = root->node;
3950                 extent_buffer_get(root->node);
3951                 path.slots[level] = 0;
3952         } else {
3953                 struct btrfs_key key;
3954                 struct btrfs_disk_key found_key;
3955
3956                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3957                 level = root_item->drop_level;
3958                 path.lowest_level = level;
3959                 if (level > btrfs_header_level(root->node) ||
3960                     level >= BTRFS_MAX_LEVEL) {
3961                         error("ignoring invalid drop level: %u", level);
3962                         goto skip_walking;
3963                 }
3964                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3965                 if (wret < 0)
3966                         goto skip_walking;
3967                 btrfs_node_key(path.nodes[level], &found_key,
3968                                 path.slots[level]);
3969                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3970                                         sizeof(found_key)));
3971         }
3972
3973         while (1) {
3974                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3975                 if (wret < 0)
3976                         ret = wret;
3977                 if (wret != 0)
3978                         break;
3979
3980                 wret = walk_up_tree(root, &path, wc, &level);
3981                 if (wret < 0)
3982                         ret = wret;
3983                 if (wret != 0)
3984                         break;
3985         }
3986 skip_walking:
3987         btrfs_release_path(&path);
3988
3989         if (!cache_tree_empty(&corrupt_blocks)) {
3990                 struct cache_extent *cache;
3991                 struct btrfs_corrupt_block *corrupt;
3992
3993                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3994                        root->root_key.objectid);
3995                 cache = first_cache_extent(&corrupt_blocks);
3996                 while (cache) {
3997                         corrupt = container_of(cache,
3998                                                struct btrfs_corrupt_block,
3999                                                cache);
4000                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4001                                cache->start, corrupt->level,
4002                                corrupt->key.objectid, corrupt->key.type,
4003                                corrupt->key.offset);
4004                         cache = next_cache_extent(cache);
4005                 }
4006                 if (repair) {
4007                         printf("Try to repair the btree for root %llu\n",
4008                                root->root_key.objectid);
4009                         ret = repair_btree(root, &corrupt_blocks);
4010                         if (ret < 0)
4011                                 fprintf(stderr, "Failed to repair btree: %s\n",
4012                                         strerror(-ret));
4013                         if (!ret)
4014                                 printf("Btree for root %llu is fixed\n",
4015                                        root->root_key.objectid);
4016                 }
4017         }
4018
4019         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4020         if (err < 0)
4021                 ret = err;
4022
4023         if (root_node.current) {
4024                 root_node.current->checked = 1;
4025                 maybe_free_inode_rec(&root_node.inode_cache,
4026                                 root_node.current);
4027         }
4028
4029         err = check_inode_recs(root, &root_node.inode_cache);
4030         if (!ret)
4031                 ret = err;
4032
4033         free_corrupt_blocks_tree(&corrupt_blocks);
4034         root->fs_info->corrupt_blocks = NULL;
4035         free_orphan_data_extents(&root->orphan_data_extents);
4036         return ret;
4037 }
4038
4039 static int fs_root_objectid(u64 objectid)
4040 {
4041         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4042             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4043                 return 1;
4044         return is_fstree(objectid);
4045 }
4046
4047 static int check_fs_roots(struct btrfs_root *root,
4048                           struct cache_tree *root_cache)
4049 {
4050         struct btrfs_path path;
4051         struct btrfs_key key;
4052         struct walk_control wc;
4053         struct extent_buffer *leaf, *tree_node;
4054         struct btrfs_root *tmp_root;
4055         struct btrfs_root *tree_root = root->fs_info->tree_root;
4056         int ret;
4057         int err = 0;
4058
4059         if (ctx.progress_enabled) {
4060                 ctx.tp = TASK_FS_ROOTS;
4061                 task_start(ctx.info);
4062         }
4063
4064         /*
4065          * Just in case we made any changes to the extent tree that weren't
4066          * reflected into the free space cache yet.
4067          */
4068         if (repair)
4069                 reset_cached_block_groups(root->fs_info);
4070         memset(&wc, 0, sizeof(wc));
4071         cache_tree_init(&wc.shared);
4072         btrfs_init_path(&path);
4073
4074 again:
4075         key.offset = 0;
4076         key.objectid = 0;
4077         key.type = BTRFS_ROOT_ITEM_KEY;
4078         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4079         if (ret < 0) {
4080                 err = 1;
4081                 goto out;
4082         }
4083         tree_node = tree_root->node;
4084         while (1) {
4085                 if (tree_node != tree_root->node) {
4086                         free_root_recs_tree(root_cache);
4087                         btrfs_release_path(&path);
4088                         goto again;
4089                 }
4090                 leaf = path.nodes[0];
4091                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4092                         ret = btrfs_next_leaf(tree_root, &path);
4093                         if (ret) {
4094                                 if (ret < 0)
4095                                         err = 1;
4096                                 break;
4097                         }
4098                         leaf = path.nodes[0];
4099                 }
4100                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4101                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4102                     fs_root_objectid(key.objectid)) {
4103                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4104                                 tmp_root = btrfs_read_fs_root_no_cache(
4105                                                 root->fs_info, &key);
4106                         } else {
4107                                 key.offset = (u64)-1;
4108                                 tmp_root = btrfs_read_fs_root(
4109                                                 root->fs_info, &key);
4110                         }
4111                         if (IS_ERR(tmp_root)) {
4112                                 err = 1;
4113                                 goto next;
4114                         }
4115                         ret = check_fs_root(tmp_root, root_cache, &wc);
4116                         if (ret == -EAGAIN) {
4117                                 free_root_recs_tree(root_cache);
4118                                 btrfs_release_path(&path);
4119                                 goto again;
4120                         }
4121                         if (ret)
4122                                 err = 1;
4123                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4124                                 btrfs_free_fs_root(tmp_root);
4125                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4126                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4127                         process_root_ref(leaf, path.slots[0], &key,
4128                                          root_cache);
4129                 }
4130 next:
4131                 path.slots[0]++;
4132         }
4133 out:
4134         btrfs_release_path(&path);
4135         if (err)
4136                 free_extent_cache_tree(&wc.shared);
4137         if (!cache_tree_empty(&wc.shared))
4138                 fprintf(stderr, "warning line %d\n", __LINE__);
4139
4140         task_stop(ctx.info);
4141
4142         return err;
4143 }
4144
4145 /*
4146  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4147  * INODE_REF/INODE_EXTREF match.
4148  *
4149  * @root:       the root of the fs/file tree
4150  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4151  * @key:        the key of the DIR_ITEM/DIR_INDEX
4152  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4153  *              distinguish root_dir between normal dir/file
4154  * @name:       the name in the INODE_REF/INODE_EXTREF
4155  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4156  * @mode:       the st_mode of INODE_ITEM
4157  *
4158  * Return 0 if no error occurred.
4159  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4160  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4161  * dir/file.
4162  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4163  * not match for normal dir/file.
4164  */
4165 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4166                          struct btrfs_key *key, u64 index, char *name,
4167                          u32 namelen, u32 mode)
4168 {
4169         struct btrfs_path path;
4170         struct extent_buffer *node;
4171         struct btrfs_dir_item *di;
4172         struct btrfs_key location;
4173         char namebuf[BTRFS_NAME_LEN] = {0};
4174         u32 total;
4175         u32 cur = 0;
4176         u32 len;
4177         u32 name_len;
4178         u32 data_len;
4179         u8 filetype;
4180         int slot;
4181         int ret;
4182
4183         btrfs_init_path(&path);
4184         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4185         if (ret < 0) {
4186                 ret = DIR_ITEM_MISSING;
4187                 goto out;
4188         }
4189
4190         /* Process root dir and goto out*/
4191         if (index == 0) {
4192                 if (ret == 0) {
4193                         ret = ROOT_DIR_ERROR;
4194                         error(
4195                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4196                                 root->objectid,
4197                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4198                                         "REF" : "EXTREF",
4199                                 ref_key->objectid, ref_key->offset,
4200                                 key->type == BTRFS_DIR_ITEM_KEY ?
4201                                         "DIR_ITEM" : "DIR_INDEX");
4202                 } else {
4203                         ret = 0;
4204                 }
4205
4206                 goto out;
4207         }
4208
4209         /* Process normal file/dir */
4210         if (ret > 0) {
4211                 ret = DIR_ITEM_MISSING;
4212                 error(
4213                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4214                         root->objectid,
4215                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4216                         ref_key->objectid, ref_key->offset,
4217                         key->type == BTRFS_DIR_ITEM_KEY ?
4218                                 "DIR_ITEM" : "DIR_INDEX",
4219                         key->objectid, key->offset, namelen, name,
4220                         imode_to_type(mode));
4221                 goto out;
4222         }
4223
4224         /* Check whether inode_id/filetype/name match */
4225         node = path.nodes[0];
4226         slot = path.slots[0];
4227         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4228         total = btrfs_item_size_nr(node, slot);
4229         while (cur < total) {
4230                 ret = DIR_ITEM_MISMATCH;
4231                 name_len = btrfs_dir_name_len(node, di);
4232                 data_len = btrfs_dir_data_len(node, di);
4233
4234                 btrfs_dir_item_key_to_cpu(node, di, &location);
4235                 if (location.objectid != ref_key->objectid ||
4236                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4237                     location.offset != 0)
4238                         goto next;
4239
4240                 filetype = btrfs_dir_type(node, di);
4241                 if (imode_to_type(mode) != filetype)
4242                         goto next;
4243
4244                 if (cur + sizeof(*di) + name_len > total ||
4245                     name_len > BTRFS_NAME_LEN) {
4246                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4247                                 root->objectid,
4248                                 key->type == BTRFS_DIR_ITEM_KEY ?
4249                                 "DIR_ITEM" : "DIR_INDEX",
4250                                 key->objectid, key->offset, name_len);
4251
4252                         if (cur + sizeof(*di) > total)
4253                                 break;
4254                         len = min_t(u32, total - cur - sizeof(*di),
4255                                     BTRFS_NAME_LEN);
4256                 } else {
4257                         len = name_len;
4258                 }
4259
4260                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4261                 if (len != namelen || strncmp(namebuf, name, len))
4262                         goto next;
4263
4264                 ret = 0;
4265                 goto out;
4266 next:
4267                 len = sizeof(*di) + name_len + data_len;
4268                 di = (struct btrfs_dir_item *)((char *)di + len);
4269                 cur += len;
4270         }
4271         if (ret == DIR_ITEM_MISMATCH)
4272                 error(
4273                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4274                         root->objectid,
4275                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4276                         ref_key->objectid, ref_key->offset,
4277                         key->type == BTRFS_DIR_ITEM_KEY ?
4278                                 "DIR_ITEM" : "DIR_INDEX",
4279                         key->objectid, key->offset, namelen, name,
4280                         imode_to_type(mode));
4281 out:
4282         btrfs_release_path(&path);
4283         return ret;
4284 }
4285
4286 /*
4287  * Traverse the given INODE_REF and call find_dir_item() to find related
4288  * DIR_ITEM/DIR_INDEX.
4289  *
4290  * @root:       the root of the fs/file tree
4291  * @ref_key:    the key of the INODE_REF
4292  * @refs:       the count of INODE_REF
4293  * @mode:       the st_mode of INODE_ITEM
4294  *
4295  * Return 0 if no error occurred.
4296  */
4297 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4298                            struct extent_buffer *node, int slot, u64 *refs,
4299                            int mode)
4300 {
4301         struct btrfs_key key;
4302         struct btrfs_inode_ref *ref;
4303         char namebuf[BTRFS_NAME_LEN] = {0};
4304         u32 total;
4305         u32 cur = 0;
4306         u32 len;
4307         u32 name_len;
4308         u64 index;
4309         int ret, err = 0;
4310
4311         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4312         total = btrfs_item_size_nr(node, slot);
4313
4314 next:
4315         /* Update inode ref count */
4316         (*refs)++;
4317
4318         index = btrfs_inode_ref_index(node, ref);
4319         name_len = btrfs_inode_ref_name_len(node, ref);
4320         if (cur + sizeof(*ref) + name_len > total ||
4321             name_len > BTRFS_NAME_LEN) {
4322                 warning("root %llu INODE_REF[%llu %llu] name too long",
4323                         root->objectid, ref_key->objectid, ref_key->offset);
4324
4325                 if (total < cur + sizeof(*ref))
4326                         goto out;
4327                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4328         } else {
4329                 len = name_len;
4330         }
4331
4332         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4333
4334         /* Check root dir ref name */
4335         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4336                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4337                       root->objectid, ref_key->objectid, ref_key->offset,
4338                       namebuf);
4339                 err |= ROOT_DIR_ERROR;
4340         }
4341
4342         /* Find related DIR_INDEX */
4343         key.objectid = ref_key->offset;
4344         key.type = BTRFS_DIR_INDEX_KEY;
4345         key.offset = index;
4346         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4347         err |= ret;
4348
4349         /* Find related dir_item */
4350         key.objectid = ref_key->offset;
4351         key.type = BTRFS_DIR_ITEM_KEY;
4352         key.offset = btrfs_name_hash(namebuf, len);
4353         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4354         err |= ret;
4355
4356         len = sizeof(*ref) + name_len;
4357         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4358         cur += len;
4359         if (cur < total)
4360                 goto next;
4361
4362 out:
4363         return err;
4364 }
4365
4366 /*
4367  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4368  * DIR_ITEM/DIR_INDEX.
4369  *
4370  * @root:       the root of the fs/file tree
4371  * @ref_key:    the key of the INODE_EXTREF
4372  * @refs:       the count of INODE_EXTREF
4373  * @mode:       the st_mode of INODE_ITEM
4374  *
4375  * Return 0 if no error occurred.
4376  */
4377 static int check_inode_extref(struct btrfs_root *root,
4378                               struct btrfs_key *ref_key,
4379                               struct extent_buffer *node, int slot, u64 *refs,
4380                               int mode)
4381 {
4382         struct btrfs_key key;
4383         struct btrfs_inode_extref *extref;
4384         char namebuf[BTRFS_NAME_LEN] = {0};
4385         u32 total;
4386         u32 cur = 0;
4387         u32 len;
4388         u32 name_len;
4389         u64 index;
4390         u64 parent;
4391         int ret;
4392         int err = 0;
4393
4394         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4395         total = btrfs_item_size_nr(node, slot);
4396
4397 next:
4398         /* update inode ref count */
4399         (*refs)++;
4400         name_len = btrfs_inode_extref_name_len(node, extref);
4401         index = btrfs_inode_extref_index(node, extref);
4402         parent = btrfs_inode_extref_parent(node, extref);
4403         if (name_len <= BTRFS_NAME_LEN) {
4404                 len = name_len;
4405         } else {
4406                 len = BTRFS_NAME_LEN;
4407                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4408                         root->objectid, ref_key->objectid, ref_key->offset);
4409         }
4410         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4411
4412         /* Check root dir ref name */
4413         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4414                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4415                       root->objectid, ref_key->objectid, ref_key->offset,
4416                       namebuf);
4417                 err |= ROOT_DIR_ERROR;
4418         }
4419
4420         /* find related dir_index */
4421         key.objectid = parent;
4422         key.type = BTRFS_DIR_INDEX_KEY;
4423         key.offset = index;
4424         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4425         err |= ret;
4426
4427         /* find related dir_item */
4428         key.objectid = parent;
4429         key.type = BTRFS_DIR_ITEM_KEY;
4430         key.offset = btrfs_name_hash(namebuf, len);
4431         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4432         err |= ret;
4433
4434         len = sizeof(*extref) + name_len;
4435         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4436         cur += len;
4437
4438         if (cur < total)
4439                 goto next;
4440
4441         return err;
4442 }
4443
4444 /*
4445  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4446  * DIR_ITEM/DIR_INDEX match.
4447  *
4448  * @root:       the root of the fs/file tree
4449  * @key:        the key of the INODE_REF/INODE_EXTREF
4450  * @name:       the name in the INODE_REF/INODE_EXTREF
4451  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4452  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4453  * to (u64)-1
4454  * @ext_ref:    the EXTENDED_IREF feature
4455  *
4456  * Return 0 if no error occurred.
4457  * Return >0 for error bitmap
4458  */
4459 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4460                           char *name, int namelen, u64 index,
4461                           unsigned int ext_ref)
4462 {
4463         struct btrfs_path path;
4464         struct btrfs_inode_ref *ref;
4465         struct btrfs_inode_extref *extref;
4466         struct extent_buffer *node;
4467         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4468         u32 total;
4469         u32 cur = 0;
4470         u32 len;
4471         u32 ref_namelen;
4472         u64 ref_index;
4473         u64 parent;
4474         u64 dir_id;
4475         int slot;
4476         int ret;
4477
4478         btrfs_init_path(&path);
4479         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4480         if (ret) {
4481                 ret = INODE_REF_MISSING;
4482                 goto extref;
4483         }
4484
4485         node = path.nodes[0];
4486         slot = path.slots[0];
4487
4488         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4489         total = btrfs_item_size_nr(node, slot);
4490
4491         /* Iterate all entry of INODE_REF */
4492         while (cur < total) {
4493                 ret = INODE_REF_MISSING;
4494
4495                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4496                 ref_index = btrfs_inode_ref_index(node, ref);
4497                 if (index != (u64)-1 && index != ref_index)
4498                         goto next_ref;
4499
4500                 if (cur + sizeof(*ref) + ref_namelen > total ||
4501                     ref_namelen > BTRFS_NAME_LEN) {
4502                         warning("root %llu INODE %s[%llu %llu] name too long",
4503                                 root->objectid,
4504                                 key->type == BTRFS_INODE_REF_KEY ?
4505                                         "REF" : "EXTREF",
4506                                 key->objectid, key->offset);
4507
4508                         if (cur + sizeof(*ref) > total)
4509                                 break;
4510                         len = min_t(u32, total - cur - sizeof(*ref),
4511                                     BTRFS_NAME_LEN);
4512                 } else {
4513                         len = ref_namelen;
4514                 }
4515
4516                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4517                                    len);
4518
4519                 if (len != namelen || strncmp(ref_namebuf, name, len))
4520                         goto next_ref;
4521
4522                 ret = 0;
4523                 goto out;
4524 next_ref:
4525                 len = sizeof(*ref) + ref_namelen;
4526                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4527                 cur += len;
4528         }
4529
4530 extref:
4531         /* Skip if not support EXTENDED_IREF feature */
4532         if (!ext_ref)
4533                 goto out;
4534
4535         btrfs_release_path(&path);
4536         btrfs_init_path(&path);
4537
4538         dir_id = key->offset;
4539         key->type = BTRFS_INODE_EXTREF_KEY;
4540         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4541
4542         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4543         if (ret) {
4544                 ret = INODE_REF_MISSING;
4545                 goto out;
4546         }
4547
4548         node = path.nodes[0];
4549         slot = path.slots[0];
4550
4551         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4552         cur = 0;
4553         total = btrfs_item_size_nr(node, slot);
4554
4555         /* Iterate all entry of INODE_EXTREF */
4556         while (cur < total) {
4557                 ret = INODE_REF_MISSING;
4558
4559                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4560                 ref_index = btrfs_inode_extref_index(node, extref);
4561                 parent = btrfs_inode_extref_parent(node, extref);
4562                 if (index != (u64)-1 && index != ref_index)
4563                         goto next_extref;
4564
4565                 if (parent != dir_id)
4566                         goto next_extref;
4567
4568                 if (ref_namelen <= BTRFS_NAME_LEN) {
4569                         len = ref_namelen;
4570                 } else {
4571                         len = BTRFS_NAME_LEN;
4572                         warning("root %llu INODE %s[%llu %llu] name too long",
4573                                 root->objectid,
4574                                 key->type == BTRFS_INODE_REF_KEY ?
4575                                         "REF" : "EXTREF",
4576                                 key->objectid, key->offset);
4577                 }
4578                 read_extent_buffer(node, ref_namebuf,
4579                                    (unsigned long)(extref + 1), len);
4580
4581                 if (len != namelen || strncmp(ref_namebuf, name, len))
4582                         goto next_extref;
4583
4584                 ret = 0;
4585                 goto out;
4586
4587 next_extref:
4588                 len = sizeof(*extref) + ref_namelen;
4589                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4590                 cur += len;
4591
4592         }
4593 out:
4594         btrfs_release_path(&path);
4595         return ret;
4596 }
4597
4598 /*
4599  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4600  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4601  *
4602  * @root:       the root of the fs/file tree
4603  * @key:        the key of the INODE_REF/INODE_EXTREF
4604  * @size:       the st_size of the INODE_ITEM
4605  * @ext_ref:    the EXTENDED_IREF feature
4606  *
4607  * Return 0 if no error occurred.
4608  */
4609 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4610                           struct extent_buffer *node, int slot, u64 *size,
4611                           unsigned int ext_ref)
4612 {
4613         struct btrfs_dir_item *di;
4614         struct btrfs_inode_item *ii;
4615         struct btrfs_path path;
4616         struct btrfs_key location;
4617         char namebuf[BTRFS_NAME_LEN] = {0};
4618         u32 total;
4619         u32 cur = 0;
4620         u32 len;
4621         u32 name_len;
4622         u32 data_len;
4623         u8 filetype;
4624         u32 mode;
4625         u64 index;
4626         int ret;
4627         int err = 0;
4628
4629         /*
4630          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4631          * ignore index check.
4632          */
4633         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4634
4635         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4636         total = btrfs_item_size_nr(node, slot);
4637
4638         while (cur < total) {
4639                 data_len = btrfs_dir_data_len(node, di);
4640                 if (data_len)
4641                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4642                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4643                               "DIR_ITEM" : "DIR_INDEX",
4644                               key->objectid, key->offset, data_len);
4645
4646                 name_len = btrfs_dir_name_len(node, di);
4647                 if (cur + sizeof(*di) + name_len > total ||
4648                     name_len > BTRFS_NAME_LEN) {
4649                         warning("root %llu %s[%llu %llu] name too long",
4650                                 root->objectid,
4651                                 key->type == BTRFS_DIR_ITEM_KEY ?
4652                                 "DIR_ITEM" : "DIR_INDEX",
4653                                 key->objectid, key->offset);
4654
4655                         if (cur + sizeof(*di) > total)
4656                                 break;
4657                         len = min_t(u32, total - cur - sizeof(*di),
4658                                     BTRFS_NAME_LEN);
4659                 } else {
4660                         len = name_len;
4661                 }
4662                 (*size) += name_len;
4663
4664                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4665                 filetype = btrfs_dir_type(node, di);
4666
4667                 btrfs_init_path(&path);
4668                 btrfs_dir_item_key_to_cpu(node, di, &location);
4669
4670                 /* Ignore related ROOT_ITEM check */
4671                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4672                         goto next;
4673
4674                 /* Check relative INODE_ITEM(existence/filetype) */
4675                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4676                 if (ret) {
4677                         err |= INODE_ITEM_MISSING;
4678                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4679                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4680                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4681                               key->offset, location.objectid, name_len,
4682                               namebuf, filetype);
4683                         goto next;
4684                 }
4685
4686                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4687                                     struct btrfs_inode_item);
4688                 mode = btrfs_inode_mode(path.nodes[0], ii);
4689
4690                 if (imode_to_type(mode) != filetype) {
4691                         err |= INODE_ITEM_MISMATCH;
4692                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4693                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4694                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4695                               key->offset, name_len, namebuf, filetype);
4696                 }
4697
4698                 /* Check relative INODE_REF/INODE_EXTREF */
4699                 location.type = BTRFS_INODE_REF_KEY;
4700                 location.offset = key->objectid;
4701                 ret = find_inode_ref(root, &location, namebuf, len,
4702                                        index, ext_ref);
4703                 err |= ret;
4704                 if (ret & INODE_REF_MISSING)
4705                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4706                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4707                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4708                               key->offset, name_len, namebuf, filetype);
4709
4710 next:
4711                 btrfs_release_path(&path);
4712                 len = sizeof(*di) + name_len + data_len;
4713                 di = (struct btrfs_dir_item *)((char *)di + len);
4714                 cur += len;
4715
4716                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4717                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4718                               root->objectid, key->objectid, key->offset);
4719                         break;
4720                 }
4721         }
4722
4723         return err;
4724 }
4725
4726 /*
4727  * Check file extent datasum/hole, update the size of the file extents,
4728  * check and update the last offset of the file extent.
4729  *
4730  * @root:       the root of fs/file tree.
4731  * @fkey:       the key of the file extent.
4732  * @nodatasum:  INODE_NODATASUM feature.
4733  * @size:       the sum of all EXTENT_DATA items size for this inode.
4734  * @end:        the offset of the last extent.
4735  *
4736  * Return 0 if no error occurred.
4737  */
4738 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4739                              struct extent_buffer *node, int slot,
4740                              unsigned int nodatasum, u64 *size, u64 *end)
4741 {
4742         struct btrfs_file_extent_item *fi;
4743         u64 disk_bytenr;
4744         u64 disk_num_bytes;
4745         u64 extent_num_bytes;
4746         u64 extent_offset;
4747         u64 csum_found;         /* In byte size, sectorsize aligned */
4748         u64 search_start;       /* Logical range start we search for csum */
4749         u64 search_len;         /* Logical range len we search for csum */
4750         unsigned int extent_type;
4751         unsigned int is_hole;
4752         int compressed = 0;
4753         int ret;
4754         int err = 0;
4755
4756         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4757
4758         /* Check inline extent */
4759         extent_type = btrfs_file_extent_type(node, fi);
4760         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4761                 struct btrfs_item *e = btrfs_item_nr(slot);
4762                 u32 item_inline_len;
4763
4764                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4765                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4766                 compressed = btrfs_file_extent_compression(node, fi);
4767                 if (extent_num_bytes == 0) {
4768                         error(
4769                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4770                                 root->objectid, fkey->objectid, fkey->offset);
4771                         err |= FILE_EXTENT_ERROR;
4772                 }
4773                 if (!compressed && extent_num_bytes != item_inline_len) {
4774                         error(
4775                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4776                                 root->objectid, fkey->objectid, fkey->offset,
4777                                 extent_num_bytes, item_inline_len);
4778                         err |= FILE_EXTENT_ERROR;
4779                 }
4780                 *size += extent_num_bytes;
4781                 return err;
4782         }
4783
4784         /* Check extent type */
4785         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4786                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4787                 err |= FILE_EXTENT_ERROR;
4788                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4789                       root->objectid, fkey->objectid, fkey->offset);
4790                 return err;
4791         }
4792
4793         /* Check REG_EXTENT/PREALLOC_EXTENT */
4794         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4795         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4796         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4797         extent_offset = btrfs_file_extent_offset(node, fi);
4798         compressed = btrfs_file_extent_compression(node, fi);
4799         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4800
4801         /*
4802          * Check EXTENT_DATA csum
4803          *
4804          * For plain (uncompressed) extent, we should only check the range
4805          * we're referring to, as it's possible that part of prealloc extent
4806          * has been written, and has csum:
4807          *
4808          * |<--- Original large preallocated extent A ---->|
4809          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4810          *      No csum                         Has csum
4811          *
4812          * For compressed extent, we should check the whole range.
4813          */
4814         if (!compressed) {
4815                 search_start = disk_bytenr + extent_offset;
4816                 search_len = extent_num_bytes;
4817         } else {
4818                 search_start = disk_bytenr;
4819                 search_len = disk_num_bytes;
4820         }
4821         ret = count_csum_range(root, search_start, search_len, &csum_found);
4822         if (csum_found > 0 && nodatasum) {
4823                 err |= ODD_CSUM_ITEM;
4824                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4825                       root->objectid, fkey->objectid, fkey->offset);
4826         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4827                    !is_hole && (ret < 0 || csum_found < search_len)) {
4828                 err |= CSUM_ITEM_MISSING;
4829                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4830                       root->objectid, fkey->objectid, fkey->offset,
4831                       csum_found, search_len);
4832         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4833                 err |= ODD_CSUM_ITEM;
4834                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4835                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4836         }
4837
4838         /* Check EXTENT_DATA hole */
4839         if (no_holes && is_hole) {
4840                 err |= FILE_EXTENT_ERROR;
4841                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4842                       root->objectid, fkey->objectid, fkey->offset);
4843         } else if (!no_holes && *end != fkey->offset) {
4844                 err |= FILE_EXTENT_ERROR;
4845                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4846                       root->objectid, fkey->objectid, fkey->offset);
4847         }
4848
4849         *end += extent_num_bytes;
4850         if (!is_hole)
4851                 *size += extent_num_bytes;
4852
4853         return err;
4854 }
4855
4856 /*
4857  * Check INODE_ITEM and related ITEMs (the same inode number)
4858  * 1. check link count
4859  * 2. check inode ref/extref
4860  * 3. check dir item/index
4861  *
4862  * @ext_ref:    the EXTENDED_IREF feature
4863  *
4864  * Return 0 if no error occurred.
4865  * Return >0 for error or hit the traversal is done(by error bitmap)
4866  */
4867 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4868                             unsigned int ext_ref)
4869 {
4870         struct extent_buffer *node;
4871         struct btrfs_inode_item *ii;
4872         struct btrfs_key key;
4873         u64 inode_id;
4874         u32 mode;
4875         u64 nlink;
4876         u64 nbytes;
4877         u64 isize;
4878         u64 size = 0;
4879         u64 refs = 0;
4880         u64 extent_end = 0;
4881         u64 extent_size = 0;
4882         unsigned int dir;
4883         unsigned int nodatasum;
4884         int slot;
4885         int ret;
4886         int err = 0;
4887
4888         node = path->nodes[0];
4889         slot = path->slots[0];
4890
4891         btrfs_item_key_to_cpu(node, &key, slot);
4892         inode_id = key.objectid;
4893
4894         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4895                 ret = btrfs_next_item(root, path);
4896                 if (ret > 0)
4897                         err |= LAST_ITEM;
4898                 return err;
4899         }
4900
4901         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4902         isize = btrfs_inode_size(node, ii);
4903         nbytes = btrfs_inode_nbytes(node, ii);
4904         mode = btrfs_inode_mode(node, ii);
4905         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4906         nlink = btrfs_inode_nlink(node, ii);
4907         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4908
4909         while (1) {
4910                 ret = btrfs_next_item(root, path);
4911                 if (ret < 0) {
4912                         /* out will fill 'err' rusing current statistics */
4913                         goto out;
4914                 } else if (ret > 0) {
4915                         err |= LAST_ITEM;
4916                         goto out;
4917                 }
4918
4919                 node = path->nodes[0];
4920                 slot = path->slots[0];
4921                 btrfs_item_key_to_cpu(node, &key, slot);
4922                 if (key.objectid != inode_id)
4923                         goto out;
4924
4925                 switch (key.type) {
4926                 case BTRFS_INODE_REF_KEY:
4927                         ret = check_inode_ref(root, &key, node, slot, &refs,
4928                                               mode);
4929                         err |= ret;
4930                         break;
4931                 case BTRFS_INODE_EXTREF_KEY:
4932                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4933                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4934                                         root->objectid, key.objectid,
4935                                         key.offset);
4936                         ret = check_inode_extref(root, &key, node, slot, &refs,
4937                                                  mode);
4938                         err |= ret;
4939                         break;
4940                 case BTRFS_DIR_ITEM_KEY:
4941                 case BTRFS_DIR_INDEX_KEY:
4942                         if (!dir) {
4943                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4944                                         root->objectid, inode_id,
4945                                         imode_to_type(mode), key.objectid,
4946                                         key.offset);
4947                         }
4948                         ret = check_dir_item(root, &key, node, slot, &size,
4949                                              ext_ref);
4950                         err |= ret;
4951                         break;
4952                 case BTRFS_EXTENT_DATA_KEY:
4953                         if (dir) {
4954                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4955                                         root->objectid, inode_id, key.objectid,
4956                                         key.offset);
4957                         }
4958                         ret = check_file_extent(root, &key, node, slot,
4959                                                 nodatasum, &extent_size,
4960                                                 &extent_end);
4961                         err |= ret;
4962                         break;
4963                 case BTRFS_XATTR_ITEM_KEY:
4964                         break;
4965                 default:
4966                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4967                               key.objectid, key.type, key.offset);
4968                 }
4969         }
4970
4971 out:
4972         /* verify INODE_ITEM nlink/isize/nbytes */
4973         if (dir) {
4974                 if (nlink != 1) {
4975                         err |= LINK_COUNT_ERROR;
4976                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4977                               root->objectid, inode_id, nlink);
4978                 }
4979
4980                 /*
4981                  * Just a warning, as dir inode nbytes is just an
4982                  * instructive value.
4983                  */
4984                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4985                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4986                                 root->objectid, inode_id, root->nodesize);
4987                 }
4988
4989                 if (isize != size) {
4990                         err |= ISIZE_ERROR;
4991                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4992                               root->objectid, inode_id, isize, size);
4993                 }
4994         } else {
4995                 if (nlink != refs) {
4996                         err |= LINK_COUNT_ERROR;
4997                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4998                               root->objectid, inode_id, nlink, refs);
4999                 } else if (!nlink) {
5000                         err |= ORPHAN_ITEM;
5001                 }
5002
5003                 if (!nbytes && !no_holes && extent_end < isize) {
5004                         err |= NBYTES_ERROR;
5005                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5006                               root->objectid, inode_id, isize);
5007                 }
5008
5009                 if (nbytes != extent_size) {
5010                         err |= NBYTES_ERROR;
5011                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5012                               root->objectid, inode_id, nbytes, extent_size);
5013                 }
5014         }
5015
5016         return err;
5017 }
5018
5019 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5020 {
5021         struct btrfs_path path;
5022         struct btrfs_key key;
5023         int err = 0;
5024         int ret;
5025
5026         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5027         key.type = BTRFS_INODE_ITEM_KEY;
5028         key.offset = 0;
5029
5030         /* For root being dropped, we don't need to check first inode */
5031         if (btrfs_root_refs(&root->root_item) == 0 &&
5032             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5033             key.objectid)
5034                 return 0;
5035
5036         btrfs_init_path(&path);
5037
5038         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5039         if (ret < 0)
5040                 goto out;
5041         if (ret > 0) {
5042                 ret = 0;
5043                 err |= INODE_ITEM_MISSING;
5044                 error("first inode item of root %llu is missing",
5045                       root->objectid);
5046         }
5047
5048         err |= check_inode_item(root, &path, ext_ref);
5049         err &= ~LAST_ITEM;
5050         if (err && !ret)
5051                 ret = -EIO;
5052 out:
5053         btrfs_release_path(&path);
5054         return ret;
5055 }
5056
5057 /*
5058  * Iterate all item on the tree and call check_inode_item() to check.
5059  *
5060  * @root:       the root of the tree to be checked.
5061  * @ext_ref:    the EXTENDED_IREF feature
5062  *
5063  * Return 0 if no error found.
5064  * Return <0 for error.
5065  */
5066 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5067 {
5068         struct btrfs_path path;
5069         struct node_refs nrefs;
5070         struct btrfs_root_item *root_item = &root->root_item;
5071         int ret;
5072         int level;
5073         int err = 0;
5074
5075         /*
5076          * We need to manually check the first inode item(256)
5077          * As the following traversal function will only start from
5078          * the first inode item in the leaf, if inode item(256) is missing
5079          * we will just skip it forever.
5080          */
5081         ret = check_fs_first_inode(root, ext_ref);
5082         if (ret < 0)
5083                 return ret;
5084
5085         memset(&nrefs, 0, sizeof(nrefs));
5086         level = btrfs_header_level(root->node);
5087         btrfs_init_path(&path);
5088
5089         if (btrfs_root_refs(root_item) > 0 ||
5090             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5091                 path.nodes[level] = root->node;
5092                 path.slots[level] = 0;
5093                 extent_buffer_get(root->node);
5094         } else {
5095                 struct btrfs_key key;
5096
5097                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5098                 level = root_item->drop_level;
5099                 path.lowest_level = level;
5100                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5101                 if (ret < 0)
5102                         goto out;
5103                 ret = 0;
5104         }
5105
5106         while (1) {
5107                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5108                 err |= !!ret;
5109
5110                 /* if ret is negative, walk shall stop */
5111                 if (ret < 0) {
5112                         ret = err;
5113                         break;
5114                 }
5115
5116                 ret = walk_up_tree_v2(root, &path, &level);
5117                 if (ret != 0) {
5118                         /* Normal exit, reset ret to err */
5119                         ret = err;
5120                         break;
5121                 }
5122         }
5123
5124 out:
5125         btrfs_release_path(&path);
5126         return ret;
5127 }
5128
5129 /*
5130  * Find the relative ref for root_ref and root_backref.
5131  *
5132  * @root:       the root of the root tree.
5133  * @ref_key:    the key of the root ref.
5134  *
5135  * Return 0 if no error occurred.
5136  */
5137 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5138                           struct extent_buffer *node, int slot)
5139 {
5140         struct btrfs_path path;
5141         struct btrfs_key key;
5142         struct btrfs_root_ref *ref;
5143         struct btrfs_root_ref *backref;
5144         char ref_name[BTRFS_NAME_LEN] = {0};
5145         char backref_name[BTRFS_NAME_LEN] = {0};
5146         u64 ref_dirid;
5147         u64 ref_seq;
5148         u32 ref_namelen;
5149         u64 backref_dirid;
5150         u64 backref_seq;
5151         u32 backref_namelen;
5152         u32 len;
5153         int ret;
5154         int err = 0;
5155
5156         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5157         ref_dirid = btrfs_root_ref_dirid(node, ref);
5158         ref_seq = btrfs_root_ref_sequence(node, ref);
5159         ref_namelen = btrfs_root_ref_name_len(node, ref);
5160
5161         if (ref_namelen <= BTRFS_NAME_LEN) {
5162                 len = ref_namelen;
5163         } else {
5164                 len = BTRFS_NAME_LEN;
5165                 warning("%s[%llu %llu] ref_name too long",
5166                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5167                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5168                         ref_key->offset);
5169         }
5170         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5171
5172         /* Find relative root_ref */
5173         key.objectid = ref_key->offset;
5174         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5175         key.offset = ref_key->objectid;
5176
5177         btrfs_init_path(&path);
5178         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5179         if (ret) {
5180                 err |= ROOT_REF_MISSING;
5181                 error("%s[%llu %llu] couldn't find relative ref",
5182                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5183                       "ROOT_REF" : "ROOT_BACKREF",
5184                       ref_key->objectid, ref_key->offset);
5185                 goto out;
5186         }
5187
5188         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5189                                  struct btrfs_root_ref);
5190         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5191         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5192         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5193
5194         if (backref_namelen <= BTRFS_NAME_LEN) {
5195                 len = backref_namelen;
5196         } else {
5197                 len = BTRFS_NAME_LEN;
5198                 warning("%s[%llu %llu] ref_name too long",
5199                         key.type == BTRFS_ROOT_REF_KEY ?
5200                         "ROOT_REF" : "ROOT_BACKREF",
5201                         key.objectid, key.offset);
5202         }
5203         read_extent_buffer(path.nodes[0], backref_name,
5204                            (unsigned long)(backref + 1), len);
5205
5206         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5207             ref_namelen != backref_namelen ||
5208             strncmp(ref_name, backref_name, len)) {
5209                 err |= ROOT_REF_MISMATCH;
5210                 error("%s[%llu %llu] mismatch relative ref",
5211                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5212                       "ROOT_REF" : "ROOT_BACKREF",
5213                       ref_key->objectid, ref_key->offset);
5214         }
5215 out:
5216         btrfs_release_path(&path);
5217         return err;
5218 }
5219
5220 /*
5221  * Check all fs/file tree in low_memory mode.
5222  *
5223  * 1. for fs tree root item, call check_fs_root_v2()
5224  * 2. for fs tree root ref/backref, call check_root_ref()
5225  *
5226  * Return 0 if no error occurred.
5227  */
5228 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5229 {
5230         struct btrfs_root *tree_root = fs_info->tree_root;
5231         struct btrfs_root *cur_root = NULL;
5232         struct btrfs_path path;
5233         struct btrfs_key key;
5234         struct extent_buffer *node;
5235         unsigned int ext_ref;
5236         int slot;
5237         int ret;
5238         int err = 0;
5239
5240         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5241
5242         btrfs_init_path(&path);
5243         key.objectid = BTRFS_FS_TREE_OBJECTID;
5244         key.offset = 0;
5245         key.type = BTRFS_ROOT_ITEM_KEY;
5246
5247         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5248         if (ret < 0) {
5249                 err = ret;
5250                 goto out;
5251         } else if (ret > 0) {
5252                 err = -ENOENT;
5253                 goto out;
5254         }
5255
5256         while (1) {
5257                 node = path.nodes[0];
5258                 slot = path.slots[0];
5259                 btrfs_item_key_to_cpu(node, &key, slot);
5260                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5261                         goto out;
5262                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5263                     fs_root_objectid(key.objectid)) {
5264                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5265                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5266                                                                        &key);
5267                         } else {
5268                                 key.offset = (u64)-1;
5269                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5270                         }
5271
5272                         if (IS_ERR(cur_root)) {
5273                                 error("Fail to read fs/subvol tree: %lld",
5274                                       key.objectid);
5275                                 err = -EIO;
5276                                 goto next;
5277                         }
5278
5279                         ret = check_fs_root_v2(cur_root, ext_ref);
5280                         err |= ret;
5281
5282                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5283                                 btrfs_free_fs_root(cur_root);
5284                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5285                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5286                         ret = check_root_ref(tree_root, &key, node, slot);
5287                         err |= ret;
5288                 }
5289 next:
5290                 ret = btrfs_next_item(tree_root, &path);
5291                 if (ret > 0)
5292                         goto out;
5293                 if (ret < 0) {
5294                         err = ret;
5295                         goto out;
5296                 }
5297         }
5298
5299 out:
5300         btrfs_release_path(&path);
5301         return err;
5302 }
5303
5304 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5305 {
5306         struct list_head *cur = rec->backrefs.next;
5307         struct extent_backref *back;
5308         struct tree_backref *tback;
5309         struct data_backref *dback;
5310         u64 found = 0;
5311         int err = 0;
5312
5313         while(cur != &rec->backrefs) {
5314                 back = to_extent_backref(cur);
5315                 cur = cur->next;
5316                 if (!back->found_extent_tree) {
5317                         err = 1;
5318                         if (!print_errs)
5319                                 goto out;
5320                         if (back->is_data) {
5321                                 dback = to_data_backref(back);
5322                                 fprintf(stderr, "Backref %llu %s %llu"
5323                                         " owner %llu offset %llu num_refs %lu"
5324                                         " not found in extent tree\n",
5325                                         (unsigned long long)rec->start,
5326                                         back->full_backref ?
5327                                         "parent" : "root",
5328                                         back->full_backref ?
5329                                         (unsigned long long)dback->parent:
5330                                         (unsigned long long)dback->root,
5331                                         (unsigned long long)dback->owner,
5332                                         (unsigned long long)dback->offset,
5333                                         (unsigned long)dback->num_refs);
5334                         } else {
5335                                 tback = to_tree_backref(back);
5336                                 fprintf(stderr, "Backref %llu parent %llu"
5337                                         " root %llu not found in extent tree\n",
5338                                         (unsigned long long)rec->start,
5339                                         (unsigned long long)tback->parent,
5340                                         (unsigned long long)tback->root);
5341                         }
5342                 }
5343                 if (!back->is_data && !back->found_ref) {
5344                         err = 1;
5345                         if (!print_errs)
5346                                 goto out;
5347                         tback = to_tree_backref(back);
5348                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5349                                 (unsigned long long)rec->start,
5350                                 back->full_backref ? "parent" : "root",
5351                                 back->full_backref ?
5352                                 (unsigned long long)tback->parent :
5353                                 (unsigned long long)tback->root, back);
5354                 }
5355                 if (back->is_data) {
5356                         dback = to_data_backref(back);
5357                         if (dback->found_ref != dback->num_refs) {
5358                                 err = 1;
5359                                 if (!print_errs)
5360                                         goto out;
5361                                 fprintf(stderr, "Incorrect local backref count"
5362                                         " on %llu %s %llu owner %llu"
5363                                         " offset %llu found %u wanted %u back %p\n",
5364                                         (unsigned long long)rec->start,
5365                                         back->full_backref ?
5366                                         "parent" : "root",
5367                                         back->full_backref ?
5368                                         (unsigned long long)dback->parent:
5369                                         (unsigned long long)dback->root,
5370                                         (unsigned long long)dback->owner,
5371                                         (unsigned long long)dback->offset,
5372                                         dback->found_ref, dback->num_refs, back);
5373                         }
5374                         if (dback->disk_bytenr != rec->start) {
5375                                 err = 1;
5376                                 if (!print_errs)
5377                                         goto out;
5378                                 fprintf(stderr, "Backref disk bytenr does not"
5379                                         " match extent record, bytenr=%llu, "
5380                                         "ref bytenr=%llu\n",
5381                                         (unsigned long long)rec->start,
5382                                         (unsigned long long)dback->disk_bytenr);
5383                         }
5384
5385                         if (dback->bytes != rec->nr) {
5386                                 err = 1;
5387                                 if (!print_errs)
5388                                         goto out;
5389                                 fprintf(stderr, "Backref bytes do not match "
5390                                         "extent backref, bytenr=%llu, ref "
5391                                         "bytes=%llu, backref bytes=%llu\n",
5392                                         (unsigned long long)rec->start,
5393                                         (unsigned long long)rec->nr,
5394                                         (unsigned long long)dback->bytes);
5395                         }
5396                 }
5397                 if (!back->is_data) {
5398                         found += 1;
5399                 } else {
5400                         dback = to_data_backref(back);
5401                         found += dback->found_ref;
5402                 }
5403         }
5404         if (found != rec->refs) {
5405                 err = 1;
5406                 if (!print_errs)
5407                         goto out;
5408                 fprintf(stderr, "Incorrect global backref count "
5409                         "on %llu found %llu wanted %llu\n",
5410                         (unsigned long long)rec->start,
5411                         (unsigned long long)found,
5412                         (unsigned long long)rec->refs);
5413         }
5414 out:
5415         return err;
5416 }
5417
5418 static int free_all_extent_backrefs(struct extent_record *rec)
5419 {
5420         struct extent_backref *back;
5421         struct list_head *cur;
5422         while (!list_empty(&rec->backrefs)) {
5423                 cur = rec->backrefs.next;
5424                 back = to_extent_backref(cur);
5425                 list_del(cur);
5426                 free(back);
5427         }
5428         return 0;
5429 }
5430
5431 static void free_extent_record_cache(struct cache_tree *extent_cache)
5432 {
5433         struct cache_extent *cache;
5434         struct extent_record *rec;
5435
5436         while (1) {
5437                 cache = first_cache_extent(extent_cache);
5438                 if (!cache)
5439                         break;
5440                 rec = container_of(cache, struct extent_record, cache);
5441                 remove_cache_extent(extent_cache, cache);
5442                 free_all_extent_backrefs(rec);
5443                 free(rec);
5444         }
5445 }
5446
5447 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5448                                  struct extent_record *rec)
5449 {
5450         if (rec->content_checked && rec->owner_ref_checked &&
5451             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5452             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5453             !rec->bad_full_backref && !rec->crossing_stripes &&
5454             !rec->wrong_chunk_type) {
5455                 remove_cache_extent(extent_cache, &rec->cache);
5456                 free_all_extent_backrefs(rec);
5457                 list_del_init(&rec->list);
5458                 free(rec);
5459         }
5460         return 0;
5461 }
5462
5463 static int check_owner_ref(struct btrfs_root *root,
5464                             struct extent_record *rec,
5465                             struct extent_buffer *buf)
5466 {
5467         struct extent_backref *node;
5468         struct tree_backref *back;
5469         struct btrfs_root *ref_root;
5470         struct btrfs_key key;
5471         struct btrfs_path path;
5472         struct extent_buffer *parent;
5473         int level;
5474         int found = 0;
5475         int ret;
5476
5477         list_for_each_entry(node, &rec->backrefs, list) {
5478                 if (node->is_data)
5479                         continue;
5480                 if (!node->found_ref)
5481                         continue;
5482                 if (node->full_backref)
5483                         continue;
5484                 back = to_tree_backref(node);
5485                 if (btrfs_header_owner(buf) == back->root)
5486                         return 0;
5487         }
5488         BUG_ON(rec->is_root);
5489
5490         /* try to find the block by search corresponding fs tree */
5491         key.objectid = btrfs_header_owner(buf);
5492         key.type = BTRFS_ROOT_ITEM_KEY;
5493         key.offset = (u64)-1;
5494
5495         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5496         if (IS_ERR(ref_root))
5497                 return 1;
5498
5499         level = btrfs_header_level(buf);
5500         if (level == 0)
5501                 btrfs_item_key_to_cpu(buf, &key, 0);
5502         else
5503                 btrfs_node_key_to_cpu(buf, &key, 0);
5504
5505         btrfs_init_path(&path);
5506         path.lowest_level = level + 1;
5507         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5508         if (ret < 0)
5509                 return 0;
5510
5511         parent = path.nodes[level + 1];
5512         if (parent && buf->start == btrfs_node_blockptr(parent,
5513                                                         path.slots[level + 1]))
5514                 found = 1;
5515
5516         btrfs_release_path(&path);
5517         return found ? 0 : 1;
5518 }
5519
5520 static int is_extent_tree_record(struct extent_record *rec)
5521 {
5522         struct list_head *cur = rec->backrefs.next;
5523         struct extent_backref *node;
5524         struct tree_backref *back;
5525         int is_extent = 0;
5526
5527         while(cur != &rec->backrefs) {
5528                 node = to_extent_backref(cur);
5529                 cur = cur->next;
5530                 if (node->is_data)
5531                         return 0;
5532                 back = to_tree_backref(node);
5533                 if (node->full_backref)
5534                         return 0;
5535                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5536                         is_extent = 1;
5537         }
5538         return is_extent;
5539 }
5540
5541
5542 static int record_bad_block_io(struct btrfs_fs_info *info,
5543                                struct cache_tree *extent_cache,
5544                                u64 start, u64 len)
5545 {
5546         struct extent_record *rec;
5547         struct cache_extent *cache;
5548         struct btrfs_key key;
5549
5550         cache = lookup_cache_extent(extent_cache, start, len);
5551         if (!cache)
5552                 return 0;
5553
5554         rec = container_of(cache, struct extent_record, cache);
5555         if (!is_extent_tree_record(rec))
5556                 return 0;
5557
5558         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5559         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5560 }
5561
5562 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5563                        struct extent_buffer *buf, int slot)
5564 {
5565         if (btrfs_header_level(buf)) {
5566                 struct btrfs_key_ptr ptr1, ptr2;
5567
5568                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5569                                    sizeof(struct btrfs_key_ptr));
5570                 read_extent_buffer(buf, &ptr2,
5571                                    btrfs_node_key_ptr_offset(slot + 1),
5572                                    sizeof(struct btrfs_key_ptr));
5573                 write_extent_buffer(buf, &ptr1,
5574                                     btrfs_node_key_ptr_offset(slot + 1),
5575                                     sizeof(struct btrfs_key_ptr));
5576                 write_extent_buffer(buf, &ptr2,
5577                                     btrfs_node_key_ptr_offset(slot),
5578                                     sizeof(struct btrfs_key_ptr));
5579                 if (slot == 0) {
5580                         struct btrfs_disk_key key;
5581                         btrfs_node_key(buf, &key, 0);
5582                         btrfs_fixup_low_keys(root, path, &key,
5583                                              btrfs_header_level(buf) + 1);
5584                 }
5585         } else {
5586                 struct btrfs_item *item1, *item2;
5587                 struct btrfs_key k1, k2;
5588                 char *item1_data, *item2_data;
5589                 u32 item1_offset, item2_offset, item1_size, item2_size;
5590
5591                 item1 = btrfs_item_nr(slot);
5592                 item2 = btrfs_item_nr(slot + 1);
5593                 btrfs_item_key_to_cpu(buf, &k1, slot);
5594                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5595                 item1_offset = btrfs_item_offset(buf, item1);
5596                 item2_offset = btrfs_item_offset(buf, item2);
5597                 item1_size = btrfs_item_size(buf, item1);
5598                 item2_size = btrfs_item_size(buf, item2);
5599
5600                 item1_data = malloc(item1_size);
5601                 if (!item1_data)
5602                         return -ENOMEM;
5603                 item2_data = malloc(item2_size);
5604                 if (!item2_data) {
5605                         free(item1_data);
5606                         return -ENOMEM;
5607                 }
5608
5609                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5610                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5611
5612                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5613                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5614                 free(item1_data);
5615                 free(item2_data);
5616
5617                 btrfs_set_item_offset(buf, item1, item2_offset);
5618                 btrfs_set_item_offset(buf, item2, item1_offset);
5619                 btrfs_set_item_size(buf, item1, item2_size);
5620                 btrfs_set_item_size(buf, item2, item1_size);
5621
5622                 path->slots[0] = slot;
5623                 btrfs_set_item_key_unsafe(root, path, &k2);
5624                 path->slots[0] = slot + 1;
5625                 btrfs_set_item_key_unsafe(root, path, &k1);
5626         }
5627         return 0;
5628 }
5629
5630 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5631 {
5632         struct extent_buffer *buf;
5633         struct btrfs_key k1, k2;
5634         int i;
5635         int level = path->lowest_level;
5636         int ret = -EIO;
5637
5638         buf = path->nodes[level];
5639         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5640                 if (level) {
5641                         btrfs_node_key_to_cpu(buf, &k1, i);
5642                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5643                 } else {
5644                         btrfs_item_key_to_cpu(buf, &k1, i);
5645                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5646                 }
5647                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5648                         continue;
5649                 ret = swap_values(root, path, buf, i);
5650                 if (ret)
5651                         break;
5652                 btrfs_mark_buffer_dirty(buf);
5653                 i = 0;
5654         }
5655         return ret;
5656 }
5657
5658 static int delete_bogus_item(struct btrfs_root *root,
5659                              struct btrfs_path *path,
5660                              struct extent_buffer *buf, int slot)
5661 {
5662         struct btrfs_key key;
5663         int nritems = btrfs_header_nritems(buf);
5664
5665         btrfs_item_key_to_cpu(buf, &key, slot);
5666
5667         /* These are all the keys we can deal with missing. */
5668         if (key.type != BTRFS_DIR_INDEX_KEY &&
5669             key.type != BTRFS_EXTENT_ITEM_KEY &&
5670             key.type != BTRFS_METADATA_ITEM_KEY &&
5671             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5672             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5673                 return -1;
5674
5675         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5676                (unsigned long long)key.objectid, key.type,
5677                (unsigned long long)key.offset, slot, buf->start);
5678         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5679                               btrfs_item_nr_offset(slot + 1),
5680                               sizeof(struct btrfs_item) *
5681                               (nritems - slot - 1));
5682         btrfs_set_header_nritems(buf, nritems - 1);
5683         if (slot == 0) {
5684                 struct btrfs_disk_key disk_key;
5685
5686                 btrfs_item_key(buf, &disk_key, 0);
5687                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5688         }
5689         btrfs_mark_buffer_dirty(buf);
5690         return 0;
5691 }
5692
5693 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5694 {
5695         struct extent_buffer *buf;
5696         int i;
5697         int ret = 0;
5698
5699         /* We should only get this for leaves */
5700         BUG_ON(path->lowest_level);
5701         buf = path->nodes[0];
5702 again:
5703         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5704                 unsigned int shift = 0, offset;
5705
5706                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5707                     BTRFS_LEAF_DATA_SIZE(root)) {
5708                         if (btrfs_item_end_nr(buf, i) >
5709                             BTRFS_LEAF_DATA_SIZE(root)) {
5710                                 ret = delete_bogus_item(root, path, buf, i);
5711                                 if (!ret)
5712                                         goto again;
5713                                 fprintf(stderr, "item is off the end of the "
5714                                         "leaf, can't fix\n");
5715                                 ret = -EIO;
5716                                 break;
5717                         }
5718                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5719                                 btrfs_item_end_nr(buf, i);
5720                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5721                            btrfs_item_offset_nr(buf, i - 1)) {
5722                         if (btrfs_item_end_nr(buf, i) >
5723                             btrfs_item_offset_nr(buf, i - 1)) {
5724                                 ret = delete_bogus_item(root, path, buf, i);
5725                                 if (!ret)
5726                                         goto again;
5727                                 fprintf(stderr, "items overlap, can't fix\n");
5728                                 ret = -EIO;
5729                                 break;
5730                         }
5731                         shift = btrfs_item_offset_nr(buf, i - 1) -
5732                                 btrfs_item_end_nr(buf, i);
5733                 }
5734                 if (!shift)
5735                         continue;
5736
5737                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5738                        i, shift, (unsigned long long)buf->start);
5739                 offset = btrfs_item_offset_nr(buf, i);
5740                 memmove_extent_buffer(buf,
5741                                       btrfs_leaf_data(buf) + offset + shift,
5742                                       btrfs_leaf_data(buf) + offset,
5743                                       btrfs_item_size_nr(buf, i));
5744                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5745                                       offset + shift);
5746                 btrfs_mark_buffer_dirty(buf);
5747         }
5748
5749         /*
5750          * We may have moved things, in which case we want to exit so we don't
5751          * write those changes out.  Once we have proper abort functionality in
5752          * progs this can be changed to something nicer.
5753          */
5754         BUG_ON(ret);
5755         return ret;
5756 }
5757
5758 /*
5759  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5760  * then just return -EIO.
5761  */
5762 static int try_to_fix_bad_block(struct btrfs_root *root,
5763                                 struct extent_buffer *buf,
5764                                 enum btrfs_tree_block_status status)
5765 {
5766         struct btrfs_trans_handle *trans;
5767         struct ulist *roots;
5768         struct ulist_node *node;
5769         struct btrfs_root *search_root;
5770         struct btrfs_path path;
5771         struct ulist_iterator iter;
5772         struct btrfs_key root_key, key;
5773         int ret;
5774
5775         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5776             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5777                 return -EIO;
5778
5779         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5780         if (ret)
5781                 return -EIO;
5782
5783         btrfs_init_path(&path);
5784         ULIST_ITER_INIT(&iter);
5785         while ((node = ulist_next(roots, &iter))) {
5786                 root_key.objectid = node->val;
5787                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5788                 root_key.offset = (u64)-1;
5789
5790                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5791                 if (IS_ERR(root)) {
5792                         ret = -EIO;
5793                         break;
5794                 }
5795
5796
5797                 trans = btrfs_start_transaction(search_root, 0);
5798                 if (IS_ERR(trans)) {
5799                         ret = PTR_ERR(trans);
5800                         break;
5801                 }
5802
5803                 path.lowest_level = btrfs_header_level(buf);
5804                 path.skip_check_block = 1;
5805                 if (path.lowest_level)
5806                         btrfs_node_key_to_cpu(buf, &key, 0);
5807                 else
5808                         btrfs_item_key_to_cpu(buf, &key, 0);
5809                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5810                 if (ret) {
5811                         ret = -EIO;
5812                         btrfs_commit_transaction(trans, search_root);
5813                         break;
5814                 }
5815                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5816                         ret = fix_key_order(search_root, &path);
5817                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5818                         ret = fix_item_offset(search_root, &path);
5819                 if (ret) {
5820                         btrfs_commit_transaction(trans, search_root);
5821                         break;
5822                 }
5823                 btrfs_release_path(&path);
5824                 btrfs_commit_transaction(trans, search_root);
5825         }
5826         ulist_free(roots);
5827         btrfs_release_path(&path);
5828         return ret;
5829 }
5830
5831 static int check_block(struct btrfs_root *root,
5832                        struct cache_tree *extent_cache,
5833                        struct extent_buffer *buf, u64 flags)
5834 {
5835         struct extent_record *rec;
5836         struct cache_extent *cache;
5837         struct btrfs_key key;
5838         enum btrfs_tree_block_status status;
5839         int ret = 0;
5840         int level;
5841
5842         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5843         if (!cache)
5844                 return 1;
5845         rec = container_of(cache, struct extent_record, cache);
5846         rec->generation = btrfs_header_generation(buf);
5847
5848         level = btrfs_header_level(buf);
5849         if (btrfs_header_nritems(buf) > 0) {
5850
5851                 if (level == 0)
5852                         btrfs_item_key_to_cpu(buf, &key, 0);
5853                 else
5854                         btrfs_node_key_to_cpu(buf, &key, 0);
5855
5856                 rec->info_objectid = key.objectid;
5857         }
5858         rec->info_level = level;
5859
5860         if (btrfs_is_leaf(buf))
5861                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5862         else
5863                 status = btrfs_check_node(root, &rec->parent_key, buf);
5864
5865         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5866                 if (repair)
5867                         status = try_to_fix_bad_block(root, buf, status);
5868                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5869                         ret = -EIO;
5870                         fprintf(stderr, "bad block %llu\n",
5871                                 (unsigned long long)buf->start);
5872                 } else {
5873                         /*
5874                          * Signal to callers we need to start the scan over
5875                          * again since we'll have cowed blocks.
5876                          */
5877                         ret = -EAGAIN;
5878                 }
5879         } else {
5880                 rec->content_checked = 1;
5881                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5882                         rec->owner_ref_checked = 1;
5883                 else {
5884                         ret = check_owner_ref(root, rec, buf);
5885                         if (!ret)
5886                                 rec->owner_ref_checked = 1;
5887                 }
5888         }
5889         if (!ret)
5890                 maybe_free_extent_rec(extent_cache, rec);
5891         return ret;
5892 }
5893
5894 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5895                                                 u64 parent, u64 root)
5896 {
5897         struct list_head *cur = rec->backrefs.next;
5898         struct extent_backref *node;
5899         struct tree_backref *back;
5900
5901         while(cur != &rec->backrefs) {
5902                 node = to_extent_backref(cur);
5903                 cur = cur->next;
5904                 if (node->is_data)
5905                         continue;
5906                 back = to_tree_backref(node);
5907                 if (parent > 0) {
5908                         if (!node->full_backref)
5909                                 continue;
5910                         if (parent == back->parent)
5911                                 return back;
5912                 } else {
5913                         if (node->full_backref)
5914                                 continue;
5915                         if (back->root == root)
5916                                 return back;
5917                 }
5918         }
5919         return NULL;
5920 }
5921
5922 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5923                                                 u64 parent, u64 root)
5924 {
5925         struct tree_backref *ref = malloc(sizeof(*ref));
5926
5927         if (!ref)
5928                 return NULL;
5929         memset(&ref->node, 0, sizeof(ref->node));
5930         if (parent > 0) {
5931                 ref->parent = parent;
5932                 ref->node.full_backref = 1;
5933         } else {
5934                 ref->root = root;
5935                 ref->node.full_backref = 0;
5936         }
5937         list_add_tail(&ref->node.list, &rec->backrefs);
5938
5939         return ref;
5940 }
5941
5942 static struct data_backref *find_data_backref(struct extent_record *rec,
5943                                                 u64 parent, u64 root,
5944                                                 u64 owner, u64 offset,
5945                                                 int found_ref,
5946                                                 u64 disk_bytenr, u64 bytes)
5947 {
5948         struct list_head *cur = rec->backrefs.next;
5949         struct extent_backref *node;
5950         struct data_backref *back;
5951
5952         while(cur != &rec->backrefs) {
5953                 node = to_extent_backref(cur);
5954                 cur = cur->next;
5955                 if (!node->is_data)
5956                         continue;
5957                 back = to_data_backref(node);
5958                 if (parent > 0) {
5959                         if (!node->full_backref)
5960                                 continue;
5961                         if (parent == back->parent)
5962                                 return back;
5963                 } else {
5964                         if (node->full_backref)
5965                                 continue;
5966                         if (back->root == root && back->owner == owner &&
5967                             back->offset == offset) {
5968                                 if (found_ref && node->found_ref &&
5969                                     (back->bytes != bytes ||
5970                                     back->disk_bytenr != disk_bytenr))
5971                                         continue;
5972                                 return back;
5973                         }
5974                 }
5975         }
5976         return NULL;
5977 }
5978
5979 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5980                                                 u64 parent, u64 root,
5981                                                 u64 owner, u64 offset,
5982                                                 u64 max_size)
5983 {
5984         struct data_backref *ref = malloc(sizeof(*ref));
5985
5986         if (!ref)
5987                 return NULL;
5988         memset(&ref->node, 0, sizeof(ref->node));
5989         ref->node.is_data = 1;
5990
5991         if (parent > 0) {
5992                 ref->parent = parent;
5993                 ref->owner = 0;
5994                 ref->offset = 0;
5995                 ref->node.full_backref = 1;
5996         } else {
5997                 ref->root = root;
5998                 ref->owner = owner;
5999                 ref->offset = offset;
6000                 ref->node.full_backref = 0;
6001         }
6002         ref->bytes = max_size;
6003         ref->found_ref = 0;
6004         ref->num_refs = 0;
6005         list_add_tail(&ref->node.list, &rec->backrefs);
6006         if (max_size > rec->max_size)
6007                 rec->max_size = max_size;
6008         return ref;
6009 }
6010
6011 /* Check if the type of extent matches with its chunk */
6012 static void check_extent_type(struct extent_record *rec)
6013 {
6014         struct btrfs_block_group_cache *bg_cache;
6015
6016         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6017         if (!bg_cache)
6018                 return;
6019
6020         /* data extent, check chunk directly*/
6021         if (!rec->metadata) {
6022                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6023                         rec->wrong_chunk_type = 1;
6024                 return;
6025         }
6026
6027         /* metadata extent, check the obvious case first */
6028         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6029                                  BTRFS_BLOCK_GROUP_METADATA))) {
6030                 rec->wrong_chunk_type = 1;
6031                 return;
6032         }
6033
6034         /*
6035          * Check SYSTEM extent, as it's also marked as metadata, we can only
6036          * make sure it's a SYSTEM extent by its backref
6037          */
6038         if (!list_empty(&rec->backrefs)) {
6039                 struct extent_backref *node;
6040                 struct tree_backref *tback;
6041                 u64 bg_type;
6042
6043                 node = to_extent_backref(rec->backrefs.next);
6044                 if (node->is_data) {
6045                         /* tree block shouldn't have data backref */
6046                         rec->wrong_chunk_type = 1;
6047                         return;
6048                 }
6049                 tback = container_of(node, struct tree_backref, node);
6050
6051                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6052                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6053                 else
6054                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6055                 if (!(bg_cache->flags & bg_type))
6056                         rec->wrong_chunk_type = 1;
6057         }
6058 }
6059
6060 /*
6061  * Allocate a new extent record, fill default values from @tmpl and insert int
6062  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6063  * the cache, otherwise it fails.
6064  */
6065 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6066                 struct extent_record *tmpl)
6067 {
6068         struct extent_record *rec;
6069         int ret = 0;
6070
6071         BUG_ON(tmpl->max_size == 0);
6072         rec = malloc(sizeof(*rec));
6073         if (!rec)
6074                 return -ENOMEM;
6075         rec->start = tmpl->start;
6076         rec->max_size = tmpl->max_size;
6077         rec->nr = max(tmpl->nr, tmpl->max_size);
6078         rec->found_rec = tmpl->found_rec;
6079         rec->content_checked = tmpl->content_checked;
6080         rec->owner_ref_checked = tmpl->owner_ref_checked;
6081         rec->num_duplicates = 0;
6082         rec->metadata = tmpl->metadata;
6083         rec->flag_block_full_backref = FLAG_UNSET;
6084         rec->bad_full_backref = 0;
6085         rec->crossing_stripes = 0;
6086         rec->wrong_chunk_type = 0;
6087         rec->is_root = tmpl->is_root;
6088         rec->refs = tmpl->refs;
6089         rec->extent_item_refs = tmpl->extent_item_refs;
6090         rec->parent_generation = tmpl->parent_generation;
6091         INIT_LIST_HEAD(&rec->backrefs);
6092         INIT_LIST_HEAD(&rec->dups);
6093         INIT_LIST_HEAD(&rec->list);
6094         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6095         rec->cache.start = tmpl->start;
6096         rec->cache.size = tmpl->nr;
6097         ret = insert_cache_extent(extent_cache, &rec->cache);
6098         if (ret) {
6099                 free(rec);
6100                 return ret;
6101         }
6102         bytes_used += rec->nr;
6103
6104         if (tmpl->metadata)
6105                 rec->crossing_stripes = check_crossing_stripes(global_info,
6106                                 rec->start, global_info->tree_root->nodesize);
6107         check_extent_type(rec);
6108         return ret;
6109 }
6110
6111 /*
6112  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6113  * some are hints:
6114  * - refs              - if found, increase refs
6115  * - is_root           - if found, set
6116  * - content_checked   - if found, set
6117  * - owner_ref_checked - if found, set
6118  *
6119  * If not found, create a new one, initialize and insert.
6120  */
6121 static int add_extent_rec(struct cache_tree *extent_cache,
6122                 struct extent_record *tmpl)
6123 {
6124         struct extent_record *rec;
6125         struct cache_extent *cache;
6126         int ret = 0;
6127         int dup = 0;
6128
6129         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6130         if (cache) {
6131                 rec = container_of(cache, struct extent_record, cache);
6132                 if (tmpl->refs)
6133                         rec->refs++;
6134                 if (rec->nr == 1)
6135                         rec->nr = max(tmpl->nr, tmpl->max_size);
6136
6137                 /*
6138                  * We need to make sure to reset nr to whatever the extent
6139                  * record says was the real size, this way we can compare it to
6140                  * the backrefs.
6141                  */
6142                 if (tmpl->found_rec) {
6143                         if (tmpl->start != rec->start || rec->found_rec) {
6144                                 struct extent_record *tmp;
6145
6146                                 dup = 1;
6147                                 if (list_empty(&rec->list))
6148                                         list_add_tail(&rec->list,
6149                                                       &duplicate_extents);
6150
6151                                 /*
6152                                  * We have to do this song and dance in case we
6153                                  * find an extent record that falls inside of
6154                                  * our current extent record but does not have
6155                                  * the same objectid.
6156                                  */
6157                                 tmp = malloc(sizeof(*tmp));
6158                                 if (!tmp)
6159                                         return -ENOMEM;
6160                                 tmp->start = tmpl->start;
6161                                 tmp->max_size = tmpl->max_size;
6162                                 tmp->nr = tmpl->nr;
6163                                 tmp->found_rec = 1;
6164                                 tmp->metadata = tmpl->metadata;
6165                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6166                                 INIT_LIST_HEAD(&tmp->list);
6167                                 list_add_tail(&tmp->list, &rec->dups);
6168                                 rec->num_duplicates++;
6169                         } else {
6170                                 rec->nr = tmpl->nr;
6171                                 rec->found_rec = 1;
6172                         }
6173                 }
6174
6175                 if (tmpl->extent_item_refs && !dup) {
6176                         if (rec->extent_item_refs) {
6177                                 fprintf(stderr, "block %llu rec "
6178                                         "extent_item_refs %llu, passed %llu\n",
6179                                         (unsigned long long)tmpl->start,
6180                                         (unsigned long long)
6181                                                         rec->extent_item_refs,
6182                                         (unsigned long long)tmpl->extent_item_refs);
6183                         }
6184                         rec->extent_item_refs = tmpl->extent_item_refs;
6185                 }
6186                 if (tmpl->is_root)
6187                         rec->is_root = 1;
6188                 if (tmpl->content_checked)
6189                         rec->content_checked = 1;
6190                 if (tmpl->owner_ref_checked)
6191                         rec->owner_ref_checked = 1;
6192                 memcpy(&rec->parent_key, &tmpl->parent_key,
6193                                 sizeof(tmpl->parent_key));
6194                 if (tmpl->parent_generation)
6195                         rec->parent_generation = tmpl->parent_generation;
6196                 if (rec->max_size < tmpl->max_size)
6197                         rec->max_size = tmpl->max_size;
6198
6199                 /*
6200                  * A metadata extent can't cross stripe_len boundary, otherwise
6201                  * kernel scrub won't be able to handle it.
6202                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6203                  * it.
6204                  */
6205                 if (tmpl->metadata)
6206                         rec->crossing_stripes = check_crossing_stripes(
6207                                         global_info, rec->start,
6208                                         global_info->tree_root->nodesize);
6209                 check_extent_type(rec);
6210                 maybe_free_extent_rec(extent_cache, rec);
6211                 return ret;
6212         }
6213
6214         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6215
6216         return ret;
6217 }
6218
6219 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6220                             u64 parent, u64 root, int found_ref)
6221 {
6222         struct extent_record *rec;
6223         struct tree_backref *back;
6224         struct cache_extent *cache;
6225         int ret;
6226
6227         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6228         if (!cache) {
6229                 struct extent_record tmpl;
6230
6231                 memset(&tmpl, 0, sizeof(tmpl));
6232                 tmpl.start = bytenr;
6233                 tmpl.nr = 1;
6234                 tmpl.metadata = 1;
6235                 tmpl.max_size = 1;
6236
6237                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6238                 if (ret)
6239                         return ret;
6240
6241                 /* really a bug in cache_extent implement now */
6242                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6243                 if (!cache)
6244                         return -ENOENT;
6245         }
6246
6247         rec = container_of(cache, struct extent_record, cache);
6248         if (rec->start != bytenr) {
6249                 /*
6250                  * Several cause, from unaligned bytenr to over lapping extents
6251                  */
6252                 return -EEXIST;
6253         }
6254
6255         back = find_tree_backref(rec, parent, root);
6256         if (!back) {
6257                 back = alloc_tree_backref(rec, parent, root);
6258                 if (!back)
6259                         return -ENOMEM;
6260         }
6261
6262         if (found_ref) {
6263                 if (back->node.found_ref) {
6264                         fprintf(stderr, "Extent back ref already exists "
6265                                 "for %llu parent %llu root %llu \n",
6266                                 (unsigned long long)bytenr,
6267                                 (unsigned long long)parent,
6268                                 (unsigned long long)root);
6269                 }
6270                 back->node.found_ref = 1;
6271         } else {
6272                 if (back->node.found_extent_tree) {
6273                         fprintf(stderr, "Extent back ref already exists "
6274                                 "for %llu parent %llu root %llu \n",
6275                                 (unsigned long long)bytenr,
6276                                 (unsigned long long)parent,
6277                                 (unsigned long long)root);
6278                 }
6279                 back->node.found_extent_tree = 1;
6280         }
6281         check_extent_type(rec);
6282         maybe_free_extent_rec(extent_cache, rec);
6283         return 0;
6284 }
6285
6286 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6287                             u64 parent, u64 root, u64 owner, u64 offset,
6288                             u32 num_refs, int found_ref, u64 max_size)
6289 {
6290         struct extent_record *rec;
6291         struct data_backref *back;
6292         struct cache_extent *cache;
6293         int ret;
6294
6295         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6296         if (!cache) {
6297                 struct extent_record tmpl;
6298
6299                 memset(&tmpl, 0, sizeof(tmpl));
6300                 tmpl.start = bytenr;
6301                 tmpl.nr = 1;
6302                 tmpl.max_size = max_size;
6303
6304                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6305                 if (ret)
6306                         return ret;
6307
6308                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6309                 if (!cache)
6310                         abort();
6311         }
6312
6313         rec = container_of(cache, struct extent_record, cache);
6314         if (rec->max_size < max_size)
6315                 rec->max_size = max_size;
6316
6317         /*
6318          * If found_ref is set then max_size is the real size and must match the
6319          * existing refs.  So if we have already found a ref then we need to
6320          * make sure that this ref matches the existing one, otherwise we need
6321          * to add a new backref so we can notice that the backrefs don't match
6322          * and we need to figure out who is telling the truth.  This is to
6323          * account for that awful fsync bug I introduced where we'd end up with
6324          * a btrfs_file_extent_item that would have its length include multiple
6325          * prealloc extents or point inside of a prealloc extent.
6326          */
6327         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6328                                  bytenr, max_size);
6329         if (!back) {
6330                 back = alloc_data_backref(rec, parent, root, owner, offset,
6331                                           max_size);
6332                 BUG_ON(!back);
6333         }
6334
6335         if (found_ref) {
6336                 BUG_ON(num_refs != 1);
6337                 if (back->node.found_ref)
6338                         BUG_ON(back->bytes != max_size);
6339                 back->node.found_ref = 1;
6340                 back->found_ref += 1;
6341                 back->bytes = max_size;
6342                 back->disk_bytenr = bytenr;
6343                 rec->refs += 1;
6344                 rec->content_checked = 1;
6345                 rec->owner_ref_checked = 1;
6346         } else {
6347                 if (back->node.found_extent_tree) {
6348                         fprintf(stderr, "Extent back ref already exists "
6349                                 "for %llu parent %llu root %llu "
6350                                 "owner %llu offset %llu num_refs %lu\n",
6351                                 (unsigned long long)bytenr,
6352                                 (unsigned long long)parent,
6353                                 (unsigned long long)root,
6354                                 (unsigned long long)owner,
6355                                 (unsigned long long)offset,
6356                                 (unsigned long)num_refs);
6357                 }
6358                 back->num_refs = num_refs;
6359                 back->node.found_extent_tree = 1;
6360         }
6361         maybe_free_extent_rec(extent_cache, rec);
6362         return 0;
6363 }
6364
6365 static int add_pending(struct cache_tree *pending,
6366                        struct cache_tree *seen, u64 bytenr, u32 size)
6367 {
6368         int ret;
6369         ret = add_cache_extent(seen, bytenr, size);
6370         if (ret)
6371                 return ret;
6372         add_cache_extent(pending, bytenr, size);
6373         return 0;
6374 }
6375
6376 static int pick_next_pending(struct cache_tree *pending,
6377                         struct cache_tree *reada,
6378                         struct cache_tree *nodes,
6379                         u64 last, struct block_info *bits, int bits_nr,
6380                         int *reada_bits)
6381 {
6382         unsigned long node_start = last;
6383         struct cache_extent *cache;
6384         int ret;
6385
6386         cache = search_cache_extent(reada, 0);
6387         if (cache) {
6388                 bits[0].start = cache->start;
6389                 bits[0].size = cache->size;
6390                 *reada_bits = 1;
6391                 return 1;
6392         }
6393         *reada_bits = 0;
6394         if (node_start > 32768)
6395                 node_start -= 32768;
6396
6397         cache = search_cache_extent(nodes, node_start);
6398         if (!cache)
6399                 cache = search_cache_extent(nodes, 0);
6400
6401         if (!cache) {
6402                  cache = search_cache_extent(pending, 0);
6403                  if (!cache)
6404                          return 0;
6405                  ret = 0;
6406                  do {
6407                          bits[ret].start = cache->start;
6408                          bits[ret].size = cache->size;
6409                          cache = next_cache_extent(cache);
6410                          ret++;
6411                  } while (cache && ret < bits_nr);
6412                  return ret;
6413         }
6414
6415         ret = 0;
6416         do {
6417                 bits[ret].start = cache->start;
6418                 bits[ret].size = cache->size;
6419                 cache = next_cache_extent(cache);
6420                 ret++;
6421         } while (cache && ret < bits_nr);
6422
6423         if (bits_nr - ret > 8) {
6424                 u64 lookup = bits[0].start + bits[0].size;
6425                 struct cache_extent *next;
6426                 next = search_cache_extent(pending, lookup);
6427                 while(next) {
6428                         if (next->start - lookup > 32768)
6429                                 break;
6430                         bits[ret].start = next->start;
6431                         bits[ret].size = next->size;
6432                         lookup = next->start + next->size;
6433                         ret++;
6434                         if (ret == bits_nr)
6435                                 break;
6436                         next = next_cache_extent(next);
6437                         if (!next)
6438                                 break;
6439                 }
6440         }
6441         return ret;
6442 }
6443
6444 static void free_chunk_record(struct cache_extent *cache)
6445 {
6446         struct chunk_record *rec;
6447
6448         rec = container_of(cache, struct chunk_record, cache);
6449         list_del_init(&rec->list);
6450         list_del_init(&rec->dextents);
6451         free(rec);
6452 }
6453
6454 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6455 {
6456         cache_tree_free_extents(chunk_cache, free_chunk_record);
6457 }
6458
6459 static void free_device_record(struct rb_node *node)
6460 {
6461         struct device_record *rec;
6462
6463         rec = container_of(node, struct device_record, node);
6464         free(rec);
6465 }
6466
6467 FREE_RB_BASED_TREE(device_cache, free_device_record);
6468
6469 int insert_block_group_record(struct block_group_tree *tree,
6470                               struct block_group_record *bg_rec)
6471 {
6472         int ret;
6473
6474         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6475         if (ret)
6476                 return ret;
6477
6478         list_add_tail(&bg_rec->list, &tree->block_groups);
6479         return 0;
6480 }
6481
6482 static void free_block_group_record(struct cache_extent *cache)
6483 {
6484         struct block_group_record *rec;
6485
6486         rec = container_of(cache, struct block_group_record, cache);
6487         list_del_init(&rec->list);
6488         free(rec);
6489 }
6490
6491 void free_block_group_tree(struct block_group_tree *tree)
6492 {
6493         cache_tree_free_extents(&tree->tree, free_block_group_record);
6494 }
6495
6496 int insert_device_extent_record(struct device_extent_tree *tree,
6497                                 struct device_extent_record *de_rec)
6498 {
6499         int ret;
6500
6501         /*
6502          * Device extent is a bit different from the other extents, because
6503          * the extents which belong to the different devices may have the
6504          * same start and size, so we need use the special extent cache
6505          * search/insert functions.
6506          */
6507         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6508         if (ret)
6509                 return ret;
6510
6511         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6512         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6513         return 0;
6514 }
6515
6516 static void free_device_extent_record(struct cache_extent *cache)
6517 {
6518         struct device_extent_record *rec;
6519
6520         rec = container_of(cache, struct device_extent_record, cache);
6521         if (!list_empty(&rec->chunk_list))
6522                 list_del_init(&rec->chunk_list);
6523         if (!list_empty(&rec->device_list))
6524                 list_del_init(&rec->device_list);
6525         free(rec);
6526 }
6527
6528 void free_device_extent_tree(struct device_extent_tree *tree)
6529 {
6530         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6531 }
6532
6533 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6534 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6535                                  struct extent_buffer *leaf, int slot)
6536 {
6537         struct btrfs_extent_ref_v0 *ref0;
6538         struct btrfs_key key;
6539         int ret;
6540
6541         btrfs_item_key_to_cpu(leaf, &key, slot);
6542         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6543         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6544                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6545                                 0, 0);
6546         } else {
6547                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6548                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6549         }
6550         return ret;
6551 }
6552 #endif
6553
6554 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6555                                             struct btrfs_key *key,
6556                                             int slot)
6557 {
6558         struct btrfs_chunk *ptr;
6559         struct chunk_record *rec;
6560         int num_stripes, i;
6561
6562         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6563         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6564
6565         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6566         if (!rec) {
6567                 fprintf(stderr, "memory allocation failed\n");
6568                 exit(-1);
6569         }
6570
6571         INIT_LIST_HEAD(&rec->list);
6572         INIT_LIST_HEAD(&rec->dextents);
6573         rec->bg_rec = NULL;
6574
6575         rec->cache.start = key->offset;
6576         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6577
6578         rec->generation = btrfs_header_generation(leaf);
6579
6580         rec->objectid = key->objectid;
6581         rec->type = key->type;
6582         rec->offset = key->offset;
6583
6584         rec->length = rec->cache.size;
6585         rec->owner = btrfs_chunk_owner(leaf, ptr);
6586         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6587         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6588         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6589         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6590         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6591         rec->num_stripes = num_stripes;
6592         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6593
6594         for (i = 0; i < rec->num_stripes; ++i) {
6595                 rec->stripes[i].devid =
6596                         btrfs_stripe_devid_nr(leaf, ptr, i);
6597                 rec->stripes[i].offset =
6598                         btrfs_stripe_offset_nr(leaf, ptr, i);
6599                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6600                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6601                                 BTRFS_UUID_SIZE);
6602         }
6603
6604         return rec;
6605 }
6606
6607 static int process_chunk_item(struct cache_tree *chunk_cache,
6608                               struct btrfs_key *key, struct extent_buffer *eb,
6609                               int slot)
6610 {
6611         struct chunk_record *rec;
6612         struct btrfs_chunk *chunk;
6613         int ret = 0;
6614
6615         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6616         /*
6617          * Do extra check for this chunk item,
6618          *
6619          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6620          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6621          * and owner<->key_type check.
6622          */
6623         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6624                                       key->offset);
6625         if (ret < 0) {
6626                 error("chunk(%llu, %llu) is not valid, ignore it",
6627                       key->offset, btrfs_chunk_length(eb, chunk));
6628                 return 0;
6629         }
6630         rec = btrfs_new_chunk_record(eb, key, slot);
6631         ret = insert_cache_extent(chunk_cache, &rec->cache);
6632         if (ret) {
6633                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6634                         rec->offset, rec->length);
6635                 free(rec);
6636         }
6637
6638         return ret;
6639 }
6640
6641 static int process_device_item(struct rb_root *dev_cache,
6642                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6643 {
6644         struct btrfs_dev_item *ptr;
6645         struct device_record *rec;
6646         int ret = 0;
6647
6648         ptr = btrfs_item_ptr(eb,
6649                 slot, struct btrfs_dev_item);
6650
6651         rec = malloc(sizeof(*rec));
6652         if (!rec) {
6653                 fprintf(stderr, "memory allocation failed\n");
6654                 return -ENOMEM;
6655         }
6656
6657         rec->devid = key->offset;
6658         rec->generation = btrfs_header_generation(eb);
6659
6660         rec->objectid = key->objectid;
6661         rec->type = key->type;
6662         rec->offset = key->offset;
6663
6664         rec->devid = btrfs_device_id(eb, ptr);
6665         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6666         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6667
6668         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6669         if (ret) {
6670                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6671                 free(rec);
6672         }
6673
6674         return ret;
6675 }
6676
6677 struct block_group_record *
6678 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6679                              int slot)
6680 {
6681         struct btrfs_block_group_item *ptr;
6682         struct block_group_record *rec;
6683
6684         rec = calloc(1, sizeof(*rec));
6685         if (!rec) {
6686                 fprintf(stderr, "memory allocation failed\n");
6687                 exit(-1);
6688         }
6689
6690         rec->cache.start = key->objectid;
6691         rec->cache.size = key->offset;
6692
6693         rec->generation = btrfs_header_generation(leaf);
6694
6695         rec->objectid = key->objectid;
6696         rec->type = key->type;
6697         rec->offset = key->offset;
6698
6699         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6700         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6701
6702         INIT_LIST_HEAD(&rec->list);
6703
6704         return rec;
6705 }
6706
6707 static int process_block_group_item(struct block_group_tree *block_group_cache,
6708                                     struct btrfs_key *key,
6709                                     struct extent_buffer *eb, int slot)
6710 {
6711         struct block_group_record *rec;
6712         int ret = 0;
6713
6714         rec = btrfs_new_block_group_record(eb, key, slot);
6715         ret = insert_block_group_record(block_group_cache, rec);
6716         if (ret) {
6717                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6718                         rec->objectid, rec->offset);
6719                 free(rec);
6720         }
6721
6722         return ret;
6723 }
6724
6725 struct device_extent_record *
6726 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6727                                struct btrfs_key *key, int slot)
6728 {
6729         struct device_extent_record *rec;
6730         struct btrfs_dev_extent *ptr;
6731
6732         rec = calloc(1, sizeof(*rec));
6733         if (!rec) {
6734                 fprintf(stderr, "memory allocation failed\n");
6735                 exit(-1);
6736         }
6737
6738         rec->cache.objectid = key->objectid;
6739         rec->cache.start = key->offset;
6740
6741         rec->generation = btrfs_header_generation(leaf);
6742
6743         rec->objectid = key->objectid;
6744         rec->type = key->type;
6745         rec->offset = key->offset;
6746
6747         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6748         rec->chunk_objecteid =
6749                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6750         rec->chunk_offset =
6751                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6752         rec->length = btrfs_dev_extent_length(leaf, ptr);
6753         rec->cache.size = rec->length;
6754
6755         INIT_LIST_HEAD(&rec->chunk_list);
6756         INIT_LIST_HEAD(&rec->device_list);
6757
6758         return rec;
6759 }
6760
6761 static int
6762 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6763                            struct btrfs_key *key, struct extent_buffer *eb,
6764                            int slot)
6765 {
6766         struct device_extent_record *rec;
6767         int ret;
6768
6769         rec = btrfs_new_device_extent_record(eb, key, slot);
6770         ret = insert_device_extent_record(dev_extent_cache, rec);
6771         if (ret) {
6772                 fprintf(stderr,
6773                         "Device extent[%llu, %llu, %llu] existed.\n",
6774                         rec->objectid, rec->offset, rec->length);
6775                 free(rec);
6776         }
6777
6778         return ret;
6779 }
6780
6781 static int process_extent_item(struct btrfs_root *root,
6782                                struct cache_tree *extent_cache,
6783                                struct extent_buffer *eb, int slot)
6784 {
6785         struct btrfs_extent_item *ei;
6786         struct btrfs_extent_inline_ref *iref;
6787         struct btrfs_extent_data_ref *dref;
6788         struct btrfs_shared_data_ref *sref;
6789         struct btrfs_key key;
6790         struct extent_record tmpl;
6791         unsigned long end;
6792         unsigned long ptr;
6793         int ret;
6794         int type;
6795         u32 item_size = btrfs_item_size_nr(eb, slot);
6796         u64 refs = 0;
6797         u64 offset;
6798         u64 num_bytes;
6799         int metadata = 0;
6800
6801         btrfs_item_key_to_cpu(eb, &key, slot);
6802
6803         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6804                 metadata = 1;
6805                 num_bytes = root->nodesize;
6806         } else {
6807                 num_bytes = key.offset;
6808         }
6809
6810         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6811                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6812                       key.objectid, root->sectorsize);
6813                 return -EIO;
6814         }
6815         if (item_size < sizeof(*ei)) {
6816 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6817                 struct btrfs_extent_item_v0 *ei0;
6818                 BUG_ON(item_size != sizeof(*ei0));
6819                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6820                 refs = btrfs_extent_refs_v0(eb, ei0);
6821 #else
6822                 BUG();
6823 #endif
6824                 memset(&tmpl, 0, sizeof(tmpl));
6825                 tmpl.start = key.objectid;
6826                 tmpl.nr = num_bytes;
6827                 tmpl.extent_item_refs = refs;
6828                 tmpl.metadata = metadata;
6829                 tmpl.found_rec = 1;
6830                 tmpl.max_size = num_bytes;
6831
6832                 return add_extent_rec(extent_cache, &tmpl);
6833         }
6834
6835         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6836         refs = btrfs_extent_refs(eb, ei);
6837         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6838                 metadata = 1;
6839         else
6840                 metadata = 0;
6841         if (metadata && num_bytes != root->nodesize) {
6842                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6843                       num_bytes, root->nodesize);
6844                 return -EIO;
6845         }
6846         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6847                 error("ignore invalid data extent, length %llu is not aligned to %u",
6848                       num_bytes, root->sectorsize);
6849                 return -EIO;
6850         }
6851
6852         memset(&tmpl, 0, sizeof(tmpl));
6853         tmpl.start = key.objectid;
6854         tmpl.nr = num_bytes;
6855         tmpl.extent_item_refs = refs;
6856         tmpl.metadata = metadata;
6857         tmpl.found_rec = 1;
6858         tmpl.max_size = num_bytes;
6859         add_extent_rec(extent_cache, &tmpl);
6860
6861         ptr = (unsigned long)(ei + 1);
6862         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6863             key.type == BTRFS_EXTENT_ITEM_KEY)
6864                 ptr += sizeof(struct btrfs_tree_block_info);
6865
6866         end = (unsigned long)ei + item_size;
6867         while (ptr < end) {
6868                 iref = (struct btrfs_extent_inline_ref *)ptr;
6869                 type = btrfs_extent_inline_ref_type(eb, iref);
6870                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6871                 switch (type) {
6872                 case BTRFS_TREE_BLOCK_REF_KEY:
6873                         ret = add_tree_backref(extent_cache, key.objectid,
6874                                         0, offset, 0);
6875                         if (ret < 0)
6876                                 error(
6877                         "add_tree_backref failed (extent items tree block): %s",
6878                                       strerror(-ret));
6879                         break;
6880                 case BTRFS_SHARED_BLOCK_REF_KEY:
6881                         ret = add_tree_backref(extent_cache, key.objectid,
6882                                         offset, 0, 0);
6883                         if (ret < 0)
6884                                 error(
6885                         "add_tree_backref failed (extent items shared block): %s",
6886                                       strerror(-ret));
6887                         break;
6888                 case BTRFS_EXTENT_DATA_REF_KEY:
6889                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6890                         add_data_backref(extent_cache, key.objectid, 0,
6891                                         btrfs_extent_data_ref_root(eb, dref),
6892                                         btrfs_extent_data_ref_objectid(eb,
6893                                                                        dref),
6894                                         btrfs_extent_data_ref_offset(eb, dref),
6895                                         btrfs_extent_data_ref_count(eb, dref),
6896                                         0, num_bytes);
6897                         break;
6898                 case BTRFS_SHARED_DATA_REF_KEY:
6899                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6900                         add_data_backref(extent_cache, key.objectid, offset,
6901                                         0, 0, 0,
6902                                         btrfs_shared_data_ref_count(eb, sref),
6903                                         0, num_bytes);
6904                         break;
6905                 default:
6906                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6907                                 key.objectid, key.type, num_bytes);
6908                         goto out;
6909                 }
6910                 ptr += btrfs_extent_inline_ref_size(type);
6911         }
6912         WARN_ON(ptr > end);
6913 out:
6914         return 0;
6915 }
6916
6917 static int check_cache_range(struct btrfs_root *root,
6918                              struct btrfs_block_group_cache *cache,
6919                              u64 offset, u64 bytes)
6920 {
6921         struct btrfs_free_space *entry;
6922         u64 *logical;
6923         u64 bytenr;
6924         int stripe_len;
6925         int i, nr, ret;
6926
6927         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6928                 bytenr = btrfs_sb_offset(i);
6929                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6930                                        cache->key.objectid, bytenr, 0,
6931                                        &logical, &nr, &stripe_len);
6932                 if (ret)
6933                         return ret;
6934
6935                 while (nr--) {
6936                         if (logical[nr] + stripe_len <= offset)
6937                                 continue;
6938                         if (offset + bytes <= logical[nr])
6939                                 continue;
6940                         if (logical[nr] == offset) {
6941                                 if (stripe_len >= bytes) {
6942                                         free(logical);
6943                                         return 0;
6944                                 }
6945                                 bytes -= stripe_len;
6946                                 offset += stripe_len;
6947                         } else if (logical[nr] < offset) {
6948                                 if (logical[nr] + stripe_len >=
6949                                     offset + bytes) {
6950                                         free(logical);
6951                                         return 0;
6952                                 }
6953                                 bytes = (offset + bytes) -
6954                                         (logical[nr] + stripe_len);
6955                                 offset = logical[nr] + stripe_len;
6956                         } else {
6957                                 /*
6958                                  * Could be tricky, the super may land in the
6959                                  * middle of the area we're checking.  First
6960                                  * check the easiest case, it's at the end.
6961                                  */
6962                                 if (logical[nr] + stripe_len >=
6963                                     bytes + offset) {
6964                                         bytes = logical[nr] - offset;
6965                                         continue;
6966                                 }
6967
6968                                 /* Check the left side */
6969                                 ret = check_cache_range(root, cache,
6970                                                         offset,
6971                                                         logical[nr] - offset);
6972                                 if (ret) {
6973                                         free(logical);
6974                                         return ret;
6975                                 }
6976
6977                                 /* Now we continue with the right side */
6978                                 bytes = (offset + bytes) -
6979                                         (logical[nr] + stripe_len);
6980                                 offset = logical[nr] + stripe_len;
6981                         }
6982                 }
6983
6984                 free(logical);
6985         }
6986
6987         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6988         if (!entry) {
6989                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6990                         offset, offset+bytes);
6991                 return -EINVAL;
6992         }
6993
6994         if (entry->offset != offset) {
6995                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6996                         entry->offset);
6997                 return -EINVAL;
6998         }
6999
7000         if (entry->bytes != bytes) {
7001                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7002                         bytes, entry->bytes, offset);
7003                 return -EINVAL;
7004         }
7005
7006         unlink_free_space(cache->free_space_ctl, entry);
7007         free(entry);
7008         return 0;
7009 }
7010
7011 static int verify_space_cache(struct btrfs_root *root,
7012                               struct btrfs_block_group_cache *cache)
7013 {
7014         struct btrfs_path path;
7015         struct extent_buffer *leaf;
7016         struct btrfs_key key;
7017         u64 last;
7018         int ret = 0;
7019
7020         root = root->fs_info->extent_root;
7021
7022         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7023
7024         btrfs_init_path(&path);
7025         key.objectid = last;
7026         key.offset = 0;
7027         key.type = BTRFS_EXTENT_ITEM_KEY;
7028         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7029         if (ret < 0)
7030                 goto out;
7031         ret = 0;
7032         while (1) {
7033                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7034                         ret = btrfs_next_leaf(root, &path);
7035                         if (ret < 0)
7036                                 goto out;
7037                         if (ret > 0) {
7038                                 ret = 0;
7039                                 break;
7040                         }
7041                 }
7042                 leaf = path.nodes[0];
7043                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7044                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7045                         break;
7046                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7047                     key.type != BTRFS_METADATA_ITEM_KEY) {
7048                         path.slots[0]++;
7049                         continue;
7050                 }
7051
7052                 if (last == key.objectid) {
7053                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7054                                 last = key.objectid + key.offset;
7055                         else
7056                                 last = key.objectid + root->nodesize;
7057                         path.slots[0]++;
7058                         continue;
7059                 }
7060
7061                 ret = check_cache_range(root, cache, last,
7062                                         key.objectid - last);
7063                 if (ret)
7064                         break;
7065                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7066                         last = key.objectid + key.offset;
7067                 else
7068                         last = key.objectid + root->nodesize;
7069                 path.slots[0]++;
7070         }
7071
7072         if (last < cache->key.objectid + cache->key.offset)
7073                 ret = check_cache_range(root, cache, last,
7074                                         cache->key.objectid +
7075                                         cache->key.offset - last);
7076
7077 out:
7078         btrfs_release_path(&path);
7079
7080         if (!ret &&
7081             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7082                 fprintf(stderr, "There are still entries left in the space "
7083                         "cache\n");
7084                 ret = -EINVAL;
7085         }
7086
7087         return ret;
7088 }
7089
7090 static int check_space_cache(struct btrfs_root *root)
7091 {
7092         struct btrfs_block_group_cache *cache;
7093         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7094         int ret;
7095         int error = 0;
7096
7097         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7098             btrfs_super_generation(root->fs_info->super_copy) !=
7099             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7100                 printf("cache and super generation don't match, space cache "
7101                        "will be invalidated\n");
7102                 return 0;
7103         }
7104
7105         if (ctx.progress_enabled) {
7106                 ctx.tp = TASK_FREE_SPACE;
7107                 task_start(ctx.info);
7108         }
7109
7110         while (1) {
7111                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7112                 if (!cache)
7113                         break;
7114
7115                 start = cache->key.objectid + cache->key.offset;
7116                 if (!cache->free_space_ctl) {
7117                         if (btrfs_init_free_space_ctl(cache,
7118                                                       root->sectorsize)) {
7119                                 ret = -ENOMEM;
7120                                 break;
7121                         }
7122                 } else {
7123                         btrfs_remove_free_space_cache(cache);
7124                 }
7125
7126                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7127                         ret = exclude_super_stripes(root, cache);
7128                         if (ret) {
7129                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7130                                         strerror(-ret));
7131                                 error++;
7132                                 continue;
7133                         }
7134                         ret = load_free_space_tree(root->fs_info, cache);
7135                         free_excluded_extents(root, cache);
7136                         if (ret < 0) {
7137                                 fprintf(stderr, "could not load free space tree: %s\n",
7138                                         strerror(-ret));
7139                                 error++;
7140                                 continue;
7141                         }
7142                         error += ret;
7143                 } else {
7144                         ret = load_free_space_cache(root->fs_info, cache);
7145                         if (!ret)
7146                                 continue;
7147                 }
7148
7149                 ret = verify_space_cache(root, cache);
7150                 if (ret) {
7151                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7152                                 cache->key.objectid);
7153                         error++;
7154                 }
7155         }
7156
7157         task_stop(ctx.info);
7158
7159         return error ? -EINVAL : 0;
7160 }
7161
7162 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7163                         u64 num_bytes, unsigned long leaf_offset,
7164                         struct extent_buffer *eb) {
7165
7166         u64 offset = 0;
7167         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7168         char *data;
7169         unsigned long csum_offset;
7170         u32 csum;
7171         u32 csum_expected;
7172         u64 read_len;
7173         u64 data_checked = 0;
7174         u64 tmp;
7175         int ret = 0;
7176         int mirror;
7177         int num_copies;
7178
7179         if (num_bytes % root->sectorsize)
7180                 return -EINVAL;
7181
7182         data = malloc(num_bytes);
7183         if (!data)
7184                 return -ENOMEM;
7185
7186         while (offset < num_bytes) {
7187                 mirror = 0;
7188 again:
7189                 read_len = num_bytes - offset;
7190                 /* read as much space once a time */
7191                 ret = read_extent_data(root, data + offset,
7192                                 bytenr + offset, &read_len, mirror);
7193                 if (ret)
7194                         goto out;
7195                 data_checked = 0;
7196                 /* verify every 4k data's checksum */
7197                 while (data_checked < read_len) {
7198                         csum = ~(u32)0;
7199                         tmp = offset + data_checked;
7200
7201                         csum = btrfs_csum_data((char *)data + tmp,
7202                                                csum, root->sectorsize);
7203                         btrfs_csum_final(csum, (u8 *)&csum);
7204
7205                         csum_offset = leaf_offset +
7206                                  tmp / root->sectorsize * csum_size;
7207                         read_extent_buffer(eb, (char *)&csum_expected,
7208                                            csum_offset, csum_size);
7209                         /* try another mirror */
7210                         if (csum != csum_expected) {
7211                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7212                                                 mirror, bytenr + tmp,
7213                                                 csum, csum_expected);
7214                                 num_copies = btrfs_num_copies(
7215                                                 &root->fs_info->mapping_tree,
7216                                                 bytenr, num_bytes);
7217                                 if (mirror < num_copies - 1) {
7218                                         mirror += 1;
7219                                         goto again;
7220                                 }
7221                         }
7222                         data_checked += root->sectorsize;
7223                 }
7224                 offset += read_len;
7225         }
7226 out:
7227         free(data);
7228         return ret;
7229 }
7230
7231 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7232                                u64 num_bytes)
7233 {
7234         struct btrfs_path path;
7235         struct extent_buffer *leaf;
7236         struct btrfs_key key;
7237         int ret;
7238
7239         btrfs_init_path(&path);
7240         key.objectid = bytenr;
7241         key.type = BTRFS_EXTENT_ITEM_KEY;
7242         key.offset = (u64)-1;
7243
7244 again:
7245         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7246                                 0, 0);
7247         if (ret < 0) {
7248                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7249                 btrfs_release_path(&path);
7250                 return ret;
7251         } else if (ret) {
7252                 if (path.slots[0] > 0) {
7253                         path.slots[0]--;
7254                 } else {
7255                         ret = btrfs_prev_leaf(root, &path);
7256                         if (ret < 0) {
7257                                 goto out;
7258                         } else if (ret > 0) {
7259                                 ret = 0;
7260                                 goto out;
7261                         }
7262                 }
7263         }
7264
7265         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7266
7267         /*
7268          * Block group items come before extent items if they have the same
7269          * bytenr, so walk back one more just in case.  Dear future traveller,
7270          * first congrats on mastering time travel.  Now if it's not too much
7271          * trouble could you go back to 2006 and tell Chris to make the
7272          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7273          * EXTENT_ITEM_KEY please?
7274          */
7275         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7276                 if (path.slots[0] > 0) {
7277                         path.slots[0]--;
7278                 } else {
7279                         ret = btrfs_prev_leaf(root, &path);
7280                         if (ret < 0) {
7281                                 goto out;
7282                         } else if (ret > 0) {
7283                                 ret = 0;
7284                                 goto out;
7285                         }
7286                 }
7287                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7288         }
7289
7290         while (num_bytes) {
7291                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7292                         ret = btrfs_next_leaf(root, &path);
7293                         if (ret < 0) {
7294                                 fprintf(stderr, "Error going to next leaf "
7295                                         "%d\n", ret);
7296                                 btrfs_release_path(&path);
7297                                 return ret;
7298                         } else if (ret) {
7299                                 break;
7300                         }
7301                 }
7302                 leaf = path.nodes[0];
7303                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7304                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7305                         path.slots[0]++;
7306                         continue;
7307                 }
7308                 if (key.objectid + key.offset < bytenr) {
7309                         path.slots[0]++;
7310                         continue;
7311                 }
7312                 if (key.objectid > bytenr + num_bytes)
7313                         break;
7314
7315                 if (key.objectid == bytenr) {
7316                         if (key.offset >= num_bytes) {
7317                                 num_bytes = 0;
7318                                 break;
7319                         }
7320                         num_bytes -= key.offset;
7321                         bytenr += key.offset;
7322                 } else if (key.objectid < bytenr) {
7323                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7324                                 num_bytes = 0;
7325                                 break;
7326                         }
7327                         num_bytes = (bytenr + num_bytes) -
7328                                 (key.objectid + key.offset);
7329                         bytenr = key.objectid + key.offset;
7330                 } else {
7331                         if (key.objectid + key.offset < bytenr + num_bytes) {
7332                                 u64 new_start = key.objectid + key.offset;
7333                                 u64 new_bytes = bytenr + num_bytes - new_start;
7334
7335                                 /*
7336                                  * Weird case, the extent is in the middle of
7337                                  * our range, we'll have to search one side
7338                                  * and then the other.  Not sure if this happens
7339                                  * in real life, but no harm in coding it up
7340                                  * anyway just in case.
7341                                  */
7342                                 btrfs_release_path(&path);
7343                                 ret = check_extent_exists(root, new_start,
7344                                                           new_bytes);
7345                                 if (ret) {
7346                                         fprintf(stderr, "Right section didn't "
7347                                                 "have a record\n");
7348                                         break;
7349                                 }
7350                                 num_bytes = key.objectid - bytenr;
7351                                 goto again;
7352                         }
7353                         num_bytes = key.objectid - bytenr;
7354                 }
7355                 path.slots[0]++;
7356         }
7357         ret = 0;
7358
7359 out:
7360         if (num_bytes && !ret) {
7361                 fprintf(stderr, "There are no extents for csum range "
7362                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7363                 ret = 1;
7364         }
7365
7366         btrfs_release_path(&path);
7367         return ret;
7368 }
7369
7370 static int check_csums(struct btrfs_root *root)
7371 {
7372         struct btrfs_path path;
7373         struct extent_buffer *leaf;
7374         struct btrfs_key key;
7375         u64 offset = 0, num_bytes = 0;
7376         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7377         int errors = 0;
7378         int ret;
7379         u64 data_len;
7380         unsigned long leaf_offset;
7381
7382         root = root->fs_info->csum_root;
7383         if (!extent_buffer_uptodate(root->node)) {
7384                 fprintf(stderr, "No valid csum tree found\n");
7385                 return -ENOENT;
7386         }
7387
7388         btrfs_init_path(&path);
7389         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7390         key.type = BTRFS_EXTENT_CSUM_KEY;
7391         key.offset = 0;
7392         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7393         if (ret < 0) {
7394                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7395                 btrfs_release_path(&path);
7396                 return ret;
7397         }
7398
7399         if (ret > 0 && path.slots[0])
7400                 path.slots[0]--;
7401         ret = 0;
7402
7403         while (1) {
7404                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7405                         ret = btrfs_next_leaf(root, &path);
7406                         if (ret < 0) {
7407                                 fprintf(stderr, "Error going to next leaf "
7408                                         "%d\n", ret);
7409                                 break;
7410                         }
7411                         if (ret)
7412                                 break;
7413                 }
7414                 leaf = path.nodes[0];
7415
7416                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7417                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7418                         path.slots[0]++;
7419                         continue;
7420                 }
7421
7422                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7423                               csum_size) * root->sectorsize;
7424                 if (!check_data_csum)
7425                         goto skip_csum_check;
7426                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7427                 ret = check_extent_csums(root, key.offset, data_len,
7428                                          leaf_offset, leaf);
7429                 if (ret)
7430                         break;
7431 skip_csum_check:
7432                 if (!num_bytes) {
7433                         offset = key.offset;
7434                 } else if (key.offset != offset + num_bytes) {
7435                         ret = check_extent_exists(root, offset, num_bytes);
7436                         if (ret) {
7437                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7438                                         "there is no extent record\n",
7439                                         offset, offset+num_bytes);
7440                                 errors++;
7441                         }
7442                         offset = key.offset;
7443                         num_bytes = 0;
7444                 }
7445                 num_bytes += data_len;
7446                 path.slots[0]++;
7447         }
7448
7449         btrfs_release_path(&path);
7450         return errors;
7451 }
7452
7453 static int is_dropped_key(struct btrfs_key *key,
7454                           struct btrfs_key *drop_key) {
7455         if (key->objectid < drop_key->objectid)
7456                 return 1;
7457         else if (key->objectid == drop_key->objectid) {
7458                 if (key->type < drop_key->type)
7459                         return 1;
7460                 else if (key->type == drop_key->type) {
7461                         if (key->offset < drop_key->offset)
7462                                 return 1;
7463                 }
7464         }
7465         return 0;
7466 }
7467
7468 /*
7469  * Here are the rules for FULL_BACKREF.
7470  *
7471  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7472  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7473  *      FULL_BACKREF set.
7474  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7475  *    if it happened after the relocation occurred since we'll have dropped the
7476  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7477  *    have no real way to know for sure.
7478  *
7479  * We process the blocks one root at a time, and we start from the lowest root
7480  * objectid and go to the highest.  So we can just lookup the owner backref for
7481  * the record and if we don't find it then we know it doesn't exist and we have
7482  * a FULL BACKREF.
7483  *
7484  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7485  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7486  * be set or not and then we can check later once we've gathered all the refs.
7487  */
7488 static int calc_extent_flag(struct cache_tree *extent_cache,
7489                            struct extent_buffer *buf,
7490                            struct root_item_record *ri,
7491                            u64 *flags)
7492 {
7493         struct extent_record *rec;
7494         struct cache_extent *cache;
7495         struct tree_backref *tback;
7496         u64 owner = 0;
7497
7498         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7499         /* we have added this extent before */
7500         if (!cache)
7501                 return -ENOENT;
7502
7503         rec = container_of(cache, struct extent_record, cache);
7504
7505         /*
7506          * Except file/reloc tree, we can not have
7507          * FULL BACKREF MODE
7508          */
7509         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7510                 goto normal;
7511         /*
7512          * root node
7513          */
7514         if (buf->start == ri->bytenr)
7515                 goto normal;
7516
7517         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7518                 goto full_backref;
7519
7520         owner = btrfs_header_owner(buf);
7521         if (owner == ri->objectid)
7522                 goto normal;
7523
7524         tback = find_tree_backref(rec, 0, owner);
7525         if (!tback)
7526                 goto full_backref;
7527 normal:
7528         *flags = 0;
7529         if (rec->flag_block_full_backref != FLAG_UNSET &&
7530             rec->flag_block_full_backref != 0)
7531                 rec->bad_full_backref = 1;
7532         return 0;
7533 full_backref:
7534         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7535         if (rec->flag_block_full_backref != FLAG_UNSET &&
7536             rec->flag_block_full_backref != 1)
7537                 rec->bad_full_backref = 1;
7538         return 0;
7539 }
7540
7541 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7542 {
7543         fprintf(stderr, "Invalid key type(");
7544         print_key_type(stderr, 0, key_type);
7545         fprintf(stderr, ") found in root(");
7546         print_objectid(stderr, rootid, 0);
7547         fprintf(stderr, ")\n");
7548 }
7549
7550 /*
7551  * Check if the key is valid with its extent buffer.
7552  *
7553  * This is a early check in case invalid key exists in a extent buffer
7554  * This is not comprehensive yet, but should prevent wrong key/item passed
7555  * further
7556  */
7557 static int check_type_with_root(u64 rootid, u8 key_type)
7558 {
7559         switch (key_type) {
7560         /* Only valid in chunk tree */
7561         case BTRFS_DEV_ITEM_KEY:
7562         case BTRFS_CHUNK_ITEM_KEY:
7563                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7564                         goto err;
7565                 break;
7566         /* valid in csum and log tree */
7567         case BTRFS_CSUM_TREE_OBJECTID:
7568                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7569                       is_fstree(rootid)))
7570                         goto err;
7571                 break;
7572         case BTRFS_EXTENT_ITEM_KEY:
7573         case BTRFS_METADATA_ITEM_KEY:
7574         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7575                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7576                         goto err;
7577                 break;
7578         case BTRFS_ROOT_ITEM_KEY:
7579                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7580                         goto err;
7581                 break;
7582         case BTRFS_DEV_EXTENT_KEY:
7583                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7584                         goto err;
7585                 break;
7586         }
7587         return 0;
7588 err:
7589         report_mismatch_key_root(key_type, rootid);
7590         return -EINVAL;
7591 }
7592
7593 static int run_next_block(struct btrfs_root *root,
7594                           struct block_info *bits,
7595                           int bits_nr,
7596                           u64 *last,
7597                           struct cache_tree *pending,
7598                           struct cache_tree *seen,
7599                           struct cache_tree *reada,
7600                           struct cache_tree *nodes,
7601                           struct cache_tree *extent_cache,
7602                           struct cache_tree *chunk_cache,
7603                           struct rb_root *dev_cache,
7604                           struct block_group_tree *block_group_cache,
7605                           struct device_extent_tree *dev_extent_cache,
7606                           struct root_item_record *ri)
7607 {
7608         struct extent_buffer *buf;
7609         struct extent_record *rec = NULL;
7610         u64 bytenr;
7611         u32 size;
7612         u64 parent;
7613         u64 owner;
7614         u64 flags;
7615         u64 ptr;
7616         u64 gen = 0;
7617         int ret = 0;
7618         int i;
7619         int nritems;
7620         struct btrfs_key key;
7621         struct cache_extent *cache;
7622         int reada_bits;
7623
7624         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7625                                     bits_nr, &reada_bits);
7626         if (nritems == 0)
7627                 return 1;
7628
7629         if (!reada_bits) {
7630                 for(i = 0; i < nritems; i++) {
7631                         ret = add_cache_extent(reada, bits[i].start,
7632                                                bits[i].size);
7633                         if (ret == -EEXIST)
7634                                 continue;
7635
7636                         /* fixme, get the parent transid */
7637                         readahead_tree_block(root, bits[i].start,
7638                                              bits[i].size, 0);
7639                 }
7640         }
7641         *last = bits[0].start;
7642         bytenr = bits[0].start;
7643         size = bits[0].size;
7644
7645         cache = lookup_cache_extent(pending, bytenr, size);
7646         if (cache) {
7647                 remove_cache_extent(pending, cache);
7648                 free(cache);
7649         }
7650         cache = lookup_cache_extent(reada, bytenr, size);
7651         if (cache) {
7652                 remove_cache_extent(reada, cache);
7653                 free(cache);
7654         }
7655         cache = lookup_cache_extent(nodes, bytenr, size);
7656         if (cache) {
7657                 remove_cache_extent(nodes, cache);
7658                 free(cache);
7659         }
7660         cache = lookup_cache_extent(extent_cache, bytenr, size);
7661         if (cache) {
7662                 rec = container_of(cache, struct extent_record, cache);
7663                 gen = rec->parent_generation;
7664         }
7665
7666         /* fixme, get the real parent transid */
7667         buf = read_tree_block(root, bytenr, size, gen);
7668         if (!extent_buffer_uptodate(buf)) {
7669                 record_bad_block_io(root->fs_info,
7670                                     extent_cache, bytenr, size);
7671                 goto out;
7672         }
7673
7674         nritems = btrfs_header_nritems(buf);
7675
7676         flags = 0;
7677         if (!init_extent_tree) {
7678                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7679                                        btrfs_header_level(buf), 1, NULL,
7680                                        &flags);
7681                 if (ret < 0) {
7682                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7683                         if (ret < 0) {
7684                                 fprintf(stderr, "Couldn't calc extent flags\n");
7685                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7686                         }
7687                 }
7688         } else {
7689                 flags = 0;
7690                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7691                 if (ret < 0) {
7692                         fprintf(stderr, "Couldn't calc extent flags\n");
7693                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7694                 }
7695         }
7696
7697         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7698                 if (ri != NULL &&
7699                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7700                     ri->objectid == btrfs_header_owner(buf)) {
7701                         /*
7702                          * Ok we got to this block from it's original owner and
7703                          * we have FULL_BACKREF set.  Relocation can leave
7704                          * converted blocks over so this is altogether possible,
7705                          * however it's not possible if the generation > the
7706                          * last snapshot, so check for this case.
7707                          */
7708                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7709                             btrfs_header_generation(buf) > ri->last_snapshot) {
7710                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7711                                 rec->bad_full_backref = 1;
7712                         }
7713                 }
7714         } else {
7715                 if (ri != NULL &&
7716                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7717                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7718                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7719                         rec->bad_full_backref = 1;
7720                 }
7721         }
7722
7723         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7724                 rec->flag_block_full_backref = 1;
7725                 parent = bytenr;
7726                 owner = 0;
7727         } else {
7728                 rec->flag_block_full_backref = 0;
7729                 parent = 0;
7730                 owner = btrfs_header_owner(buf);
7731         }
7732
7733         ret = check_block(root, extent_cache, buf, flags);
7734         if (ret)
7735                 goto out;
7736
7737         if (btrfs_is_leaf(buf)) {
7738                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7739                 for (i = 0; i < nritems; i++) {
7740                         struct btrfs_file_extent_item *fi;
7741                         btrfs_item_key_to_cpu(buf, &key, i);
7742                         /*
7743                          * Check key type against the leaf owner.
7744                          * Could filter quite a lot of early error if
7745                          * owner is correct
7746                          */
7747                         if (check_type_with_root(btrfs_header_owner(buf),
7748                                                  key.type)) {
7749                                 fprintf(stderr, "ignoring invalid key\n");
7750                                 continue;
7751                         }
7752                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7753                                 process_extent_item(root, extent_cache, buf,
7754                                                     i);
7755                                 continue;
7756                         }
7757                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7758                                 process_extent_item(root, extent_cache, buf,
7759                                                     i);
7760                                 continue;
7761                         }
7762                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7763                                 total_csum_bytes +=
7764                                         btrfs_item_size_nr(buf, i);
7765                                 continue;
7766                         }
7767                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7768                                 process_chunk_item(chunk_cache, &key, buf, i);
7769                                 continue;
7770                         }
7771                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7772                                 process_device_item(dev_cache, &key, buf, i);
7773                                 continue;
7774                         }
7775                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7776                                 process_block_group_item(block_group_cache,
7777                                         &key, buf, i);
7778                                 continue;
7779                         }
7780                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7781                                 process_device_extent_item(dev_extent_cache,
7782                                         &key, buf, i);
7783                                 continue;
7784
7785                         }
7786                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7787 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7788                                 process_extent_ref_v0(extent_cache, buf, i);
7789 #else
7790                                 BUG();
7791 #endif
7792                                 continue;
7793                         }
7794
7795                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7796                                 ret = add_tree_backref(extent_cache,
7797                                                 key.objectid, 0, key.offset, 0);
7798                                 if (ret < 0)
7799                                         error(
7800                                 "add_tree_backref failed (leaf tree block): %s",
7801                                               strerror(-ret));
7802                                 continue;
7803                         }
7804                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7805                                 ret = add_tree_backref(extent_cache,
7806                                                 key.objectid, key.offset, 0, 0);
7807                                 if (ret < 0)
7808                                         error(
7809                                 "add_tree_backref failed (leaf shared block): %s",
7810                                               strerror(-ret));
7811                                 continue;
7812                         }
7813                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7814                                 struct btrfs_extent_data_ref *ref;
7815                                 ref = btrfs_item_ptr(buf, i,
7816                                                 struct btrfs_extent_data_ref);
7817                                 add_data_backref(extent_cache,
7818                                         key.objectid, 0,
7819                                         btrfs_extent_data_ref_root(buf, ref),
7820                                         btrfs_extent_data_ref_objectid(buf,
7821                                                                        ref),
7822                                         btrfs_extent_data_ref_offset(buf, ref),
7823                                         btrfs_extent_data_ref_count(buf, ref),
7824                                         0, root->sectorsize);
7825                                 continue;
7826                         }
7827                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7828                                 struct btrfs_shared_data_ref *ref;
7829                                 ref = btrfs_item_ptr(buf, i,
7830                                                 struct btrfs_shared_data_ref);
7831                                 add_data_backref(extent_cache,
7832                                         key.objectid, key.offset, 0, 0, 0,
7833                                         btrfs_shared_data_ref_count(buf, ref),
7834                                         0, root->sectorsize);
7835                                 continue;
7836                         }
7837                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7838                                 struct bad_item *bad;
7839
7840                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7841                                         continue;
7842                                 if (!owner)
7843                                         continue;
7844                                 bad = malloc(sizeof(struct bad_item));
7845                                 if (!bad)
7846                                         continue;
7847                                 INIT_LIST_HEAD(&bad->list);
7848                                 memcpy(&bad->key, &key,
7849                                        sizeof(struct btrfs_key));
7850                                 bad->root_id = owner;
7851                                 list_add_tail(&bad->list, &delete_items);
7852                                 continue;
7853                         }
7854                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7855                                 continue;
7856                         fi = btrfs_item_ptr(buf, i,
7857                                             struct btrfs_file_extent_item);
7858                         if (btrfs_file_extent_type(buf, fi) ==
7859                             BTRFS_FILE_EXTENT_INLINE)
7860                                 continue;
7861                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7862                                 continue;
7863
7864                         data_bytes_allocated +=
7865                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7866                         if (data_bytes_allocated < root->sectorsize) {
7867                                 abort();
7868                         }
7869                         data_bytes_referenced +=
7870                                 btrfs_file_extent_num_bytes(buf, fi);
7871                         add_data_backref(extent_cache,
7872                                 btrfs_file_extent_disk_bytenr(buf, fi),
7873                                 parent, owner, key.objectid, key.offset -
7874                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7875                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7876                 }
7877         } else {
7878                 int level;
7879                 struct btrfs_key first_key;
7880
7881                 first_key.objectid = 0;
7882
7883                 if (nritems > 0)
7884                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7885                 level = btrfs_header_level(buf);
7886                 for (i = 0; i < nritems; i++) {
7887                         struct extent_record tmpl;
7888
7889                         ptr = btrfs_node_blockptr(buf, i);
7890                         size = root->nodesize;
7891                         btrfs_node_key_to_cpu(buf, &key, i);
7892                         if (ri != NULL) {
7893                                 if ((level == ri->drop_level)
7894                                     && is_dropped_key(&key, &ri->drop_key)) {
7895                                         continue;
7896                                 }
7897                         }
7898
7899                         memset(&tmpl, 0, sizeof(tmpl));
7900                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7901                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7902                         tmpl.start = ptr;
7903                         tmpl.nr = size;
7904                         tmpl.refs = 1;
7905                         tmpl.metadata = 1;
7906                         tmpl.max_size = size;
7907                         ret = add_extent_rec(extent_cache, &tmpl);
7908                         if (ret < 0)
7909                                 goto out;
7910
7911                         ret = add_tree_backref(extent_cache, ptr, parent,
7912                                         owner, 1);
7913                         if (ret < 0) {
7914                                 error(
7915                                 "add_tree_backref failed (non-leaf block): %s",
7916                                       strerror(-ret));
7917                                 continue;
7918                         }
7919
7920                         if (level > 1) {
7921                                 add_pending(nodes, seen, ptr, size);
7922                         } else {
7923                                 add_pending(pending, seen, ptr, size);
7924                         }
7925                 }
7926                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7927                                       nritems) * sizeof(struct btrfs_key_ptr);
7928         }
7929         total_btree_bytes += buf->len;
7930         if (fs_root_objectid(btrfs_header_owner(buf)))
7931                 total_fs_tree_bytes += buf->len;
7932         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7933                 total_extent_tree_bytes += buf->len;
7934         if (!found_old_backref &&
7935             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7936             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7937             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7938                 found_old_backref = 1;
7939 out:
7940         free_extent_buffer(buf);
7941         return ret;
7942 }
7943
7944 static int add_root_to_pending(struct extent_buffer *buf,
7945                                struct cache_tree *extent_cache,
7946                                struct cache_tree *pending,
7947                                struct cache_tree *seen,
7948                                struct cache_tree *nodes,
7949                                u64 objectid)
7950 {
7951         struct extent_record tmpl;
7952         int ret;
7953
7954         if (btrfs_header_level(buf) > 0)
7955                 add_pending(nodes, seen, buf->start, buf->len);
7956         else
7957                 add_pending(pending, seen, buf->start, buf->len);
7958
7959         memset(&tmpl, 0, sizeof(tmpl));
7960         tmpl.start = buf->start;
7961         tmpl.nr = buf->len;
7962         tmpl.is_root = 1;
7963         tmpl.refs = 1;
7964         tmpl.metadata = 1;
7965         tmpl.max_size = buf->len;
7966         add_extent_rec(extent_cache, &tmpl);
7967
7968         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7969             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7970                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7971                                 0, 1);
7972         else
7973                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7974                                 1);
7975         return ret;
7976 }
7977
7978 /* as we fix the tree, we might be deleting blocks that
7979  * we're tracking for repair.  This hook makes sure we
7980  * remove any backrefs for blocks as we are fixing them.
7981  */
7982 static int free_extent_hook(struct btrfs_trans_handle *trans,
7983                             struct btrfs_root *root,
7984                             u64 bytenr, u64 num_bytes, u64 parent,
7985                             u64 root_objectid, u64 owner, u64 offset,
7986                             int refs_to_drop)
7987 {
7988         struct extent_record *rec;
7989         struct cache_extent *cache;
7990         int is_data;
7991         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7992
7993         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7994         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7995         if (!cache)
7996                 return 0;
7997
7998         rec = container_of(cache, struct extent_record, cache);
7999         if (is_data) {
8000                 struct data_backref *back;
8001                 back = find_data_backref(rec, parent, root_objectid, owner,
8002                                          offset, 1, bytenr, num_bytes);
8003                 if (!back)
8004                         goto out;
8005                 if (back->node.found_ref) {
8006                         back->found_ref -= refs_to_drop;
8007                         if (rec->refs)
8008                                 rec->refs -= refs_to_drop;
8009                 }
8010                 if (back->node.found_extent_tree) {
8011                         back->num_refs -= refs_to_drop;
8012                         if (rec->extent_item_refs)
8013                                 rec->extent_item_refs -= refs_to_drop;
8014                 }
8015                 if (back->found_ref == 0)
8016                         back->node.found_ref = 0;
8017                 if (back->num_refs == 0)
8018                         back->node.found_extent_tree = 0;
8019
8020                 if (!back->node.found_extent_tree && back->node.found_ref) {
8021                         list_del(&back->node.list);
8022                         free(back);
8023                 }
8024         } else {
8025                 struct tree_backref *back;
8026                 back = find_tree_backref(rec, parent, root_objectid);
8027                 if (!back)
8028                         goto out;
8029                 if (back->node.found_ref) {
8030                         if (rec->refs)
8031                                 rec->refs--;
8032                         back->node.found_ref = 0;
8033                 }
8034                 if (back->node.found_extent_tree) {
8035                         if (rec->extent_item_refs)
8036                                 rec->extent_item_refs--;
8037                         back->node.found_extent_tree = 0;
8038                 }
8039                 if (!back->node.found_extent_tree && back->node.found_ref) {
8040                         list_del(&back->node.list);
8041                         free(back);
8042                 }
8043         }
8044         maybe_free_extent_rec(extent_cache, rec);
8045 out:
8046         return 0;
8047 }
8048
8049 static int delete_extent_records(struct btrfs_trans_handle *trans,
8050                                  struct btrfs_root *root,
8051                                  struct btrfs_path *path,
8052                                  u64 bytenr)
8053 {
8054         struct btrfs_key key;
8055         struct btrfs_key found_key;
8056         struct extent_buffer *leaf;
8057         int ret;
8058         int slot;
8059
8060
8061         key.objectid = bytenr;
8062         key.type = (u8)-1;
8063         key.offset = (u64)-1;
8064
8065         while(1) {
8066                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8067                                         &key, path, 0, 1);
8068                 if (ret < 0)
8069                         break;
8070
8071                 if (ret > 0) {
8072                         ret = 0;
8073                         if (path->slots[0] == 0)
8074                                 break;
8075                         path->slots[0]--;
8076                 }
8077                 ret = 0;
8078
8079                 leaf = path->nodes[0];
8080                 slot = path->slots[0];
8081
8082                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8083                 if (found_key.objectid != bytenr)
8084                         break;
8085
8086                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8087                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8088                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8089                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8090                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8091                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8092                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8093                         btrfs_release_path(path);
8094                         if (found_key.type == 0) {
8095                                 if (found_key.offset == 0)
8096                                         break;
8097                                 key.offset = found_key.offset - 1;
8098                                 key.type = found_key.type;
8099                         }
8100                         key.type = found_key.type - 1;
8101                         key.offset = (u64)-1;
8102                         continue;
8103                 }
8104
8105                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8106                         found_key.objectid, found_key.type, found_key.offset);
8107
8108                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8109                 if (ret)
8110                         break;
8111                 btrfs_release_path(path);
8112
8113                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8114                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8115                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8116                                 found_key.offset : root->nodesize;
8117
8118                         ret = btrfs_update_block_group(trans, root, bytenr,
8119                                                        bytes, 0, 0);
8120                         if (ret)
8121                                 break;
8122                 }
8123         }
8124
8125         btrfs_release_path(path);
8126         return ret;
8127 }
8128
8129 /*
8130  * for a single backref, this will allocate a new extent
8131  * and add the backref to it.
8132  */
8133 static int record_extent(struct btrfs_trans_handle *trans,
8134                          struct btrfs_fs_info *info,
8135                          struct btrfs_path *path,
8136                          struct extent_record *rec,
8137                          struct extent_backref *back,
8138                          int allocated, u64 flags)
8139 {
8140         int ret = 0;
8141         struct btrfs_root *extent_root = info->extent_root;
8142         struct extent_buffer *leaf;
8143         struct btrfs_key ins_key;
8144         struct btrfs_extent_item *ei;
8145         struct data_backref *dback;
8146         struct btrfs_tree_block_info *bi;
8147
8148         if (!back->is_data)
8149                 rec->max_size = max_t(u64, rec->max_size,
8150                                     info->extent_root->nodesize);
8151
8152         if (!allocated) {
8153                 u32 item_size = sizeof(*ei);
8154
8155                 if (!back->is_data)
8156                         item_size += sizeof(*bi);
8157
8158                 ins_key.objectid = rec->start;
8159                 ins_key.offset = rec->max_size;
8160                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8161
8162                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8163                                         &ins_key, item_size);
8164                 if (ret)
8165                         goto fail;
8166
8167                 leaf = path->nodes[0];
8168                 ei = btrfs_item_ptr(leaf, path->slots[0],
8169                                     struct btrfs_extent_item);
8170
8171                 btrfs_set_extent_refs(leaf, ei, 0);
8172                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8173
8174                 if (back->is_data) {
8175                         btrfs_set_extent_flags(leaf, ei,
8176                                                BTRFS_EXTENT_FLAG_DATA);
8177                 } else {
8178                         struct btrfs_disk_key copy_key;;
8179
8180                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8181                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8182                                              sizeof(*bi));
8183
8184                         btrfs_set_disk_key_objectid(&copy_key,
8185                                                     rec->info_objectid);
8186                         btrfs_set_disk_key_type(&copy_key, 0);
8187                         btrfs_set_disk_key_offset(&copy_key, 0);
8188
8189                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8190                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8191
8192                         btrfs_set_extent_flags(leaf, ei,
8193                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8194                 }
8195
8196                 btrfs_mark_buffer_dirty(leaf);
8197                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8198                                                rec->max_size, 1, 0);
8199                 if (ret)
8200                         goto fail;
8201                 btrfs_release_path(path);
8202         }
8203
8204         if (back->is_data) {
8205                 u64 parent;
8206                 int i;
8207
8208                 dback = to_data_backref(back);
8209                 if (back->full_backref)
8210                         parent = dback->parent;
8211                 else
8212                         parent = 0;
8213
8214                 for (i = 0; i < dback->found_ref; i++) {
8215                         /* if parent != 0, we're doing a full backref
8216                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8217                          * just makes the backref allocator create a data
8218                          * backref
8219                          */
8220                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8221                                                    rec->start, rec->max_size,
8222                                                    parent,
8223                                                    dback->root,
8224                                                    parent ?
8225                                                    BTRFS_FIRST_FREE_OBJECTID :
8226                                                    dback->owner,
8227                                                    dback->offset);
8228                         if (ret)
8229                                 break;
8230                 }
8231                 fprintf(stderr, "adding new data backref"
8232                                 " on %llu %s %llu owner %llu"
8233                                 " offset %llu found %d\n",
8234                                 (unsigned long long)rec->start,
8235                                 back->full_backref ?
8236                                 "parent" : "root",
8237                                 back->full_backref ?
8238                                 (unsigned long long)parent :
8239                                 (unsigned long long)dback->root,
8240                                 (unsigned long long)dback->owner,
8241                                 (unsigned long long)dback->offset,
8242                                 dback->found_ref);
8243         } else {
8244                 u64 parent;
8245                 struct tree_backref *tback;
8246
8247                 tback = to_tree_backref(back);
8248                 if (back->full_backref)
8249                         parent = tback->parent;
8250                 else
8251                         parent = 0;
8252
8253                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8254                                            rec->start, rec->max_size,
8255                                            parent, tback->root, 0, 0);
8256                 fprintf(stderr, "adding new tree backref on "
8257                         "start %llu len %llu parent %llu root %llu\n",
8258                         rec->start, rec->max_size, parent, tback->root);
8259         }
8260 fail:
8261         btrfs_release_path(path);
8262         return ret;
8263 }
8264
8265 static struct extent_entry *find_entry(struct list_head *entries,
8266                                        u64 bytenr, u64 bytes)
8267 {
8268         struct extent_entry *entry = NULL;
8269
8270         list_for_each_entry(entry, entries, list) {
8271                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8272                         return entry;
8273         }
8274
8275         return NULL;
8276 }
8277
8278 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8279 {
8280         struct extent_entry *entry, *best = NULL, *prev = NULL;
8281
8282         list_for_each_entry(entry, entries, list) {
8283                 /*
8284                  * If there are as many broken entries as entries then we know
8285                  * not to trust this particular entry.
8286                  */
8287                 if (entry->broken == entry->count)
8288                         continue;
8289
8290                 /*
8291                  * Special case, when there are only two entries and 'best' is
8292                  * the first one
8293                  */
8294                 if (!prev) {
8295                         best = entry;
8296                         prev = entry;
8297                         continue;
8298                 }
8299
8300                 /*
8301                  * If our current entry == best then we can't be sure our best
8302                  * is really the best, so we need to keep searching.
8303                  */
8304                 if (best && best->count == entry->count) {
8305                         prev = entry;
8306                         best = NULL;
8307                         continue;
8308                 }
8309
8310                 /* Prev == entry, not good enough, have to keep searching */
8311                 if (!prev->broken && prev->count == entry->count)
8312                         continue;
8313
8314                 if (!best)
8315                         best = (prev->count > entry->count) ? prev : entry;
8316                 else if (best->count < entry->count)
8317                         best = entry;
8318                 prev = entry;
8319         }
8320
8321         return best;
8322 }
8323
8324 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8325                       struct data_backref *dback, struct extent_entry *entry)
8326 {
8327         struct btrfs_trans_handle *trans;
8328         struct btrfs_root *root;
8329         struct btrfs_file_extent_item *fi;
8330         struct extent_buffer *leaf;
8331         struct btrfs_key key;
8332         u64 bytenr, bytes;
8333         int ret, err;
8334
8335         key.objectid = dback->root;
8336         key.type = BTRFS_ROOT_ITEM_KEY;
8337         key.offset = (u64)-1;
8338         root = btrfs_read_fs_root(info, &key);
8339         if (IS_ERR(root)) {
8340                 fprintf(stderr, "Couldn't find root for our ref\n");
8341                 return -EINVAL;
8342         }
8343
8344         /*
8345          * The backref points to the original offset of the extent if it was
8346          * split, so we need to search down to the offset we have and then walk
8347          * forward until we find the backref we're looking for.
8348          */
8349         key.objectid = dback->owner;
8350         key.type = BTRFS_EXTENT_DATA_KEY;
8351         key.offset = dback->offset;
8352         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8353         if (ret < 0) {
8354                 fprintf(stderr, "Error looking up ref %d\n", ret);
8355                 return ret;
8356         }
8357
8358         while (1) {
8359                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8360                         ret = btrfs_next_leaf(root, path);
8361                         if (ret) {
8362                                 fprintf(stderr, "Couldn't find our ref, next\n");
8363                                 return -EINVAL;
8364                         }
8365                 }
8366                 leaf = path->nodes[0];
8367                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8368                 if (key.objectid != dback->owner ||
8369                     key.type != BTRFS_EXTENT_DATA_KEY) {
8370                         fprintf(stderr, "Couldn't find our ref, search\n");
8371                         return -EINVAL;
8372                 }
8373                 fi = btrfs_item_ptr(leaf, path->slots[0],
8374                                     struct btrfs_file_extent_item);
8375                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8376                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8377
8378                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8379                         break;
8380                 path->slots[0]++;
8381         }
8382
8383         btrfs_release_path(path);
8384
8385         trans = btrfs_start_transaction(root, 1);
8386         if (IS_ERR(trans))
8387                 return PTR_ERR(trans);
8388
8389         /*
8390          * Ok we have the key of the file extent we want to fix, now we can cow
8391          * down to the thing and fix it.
8392          */
8393         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8394         if (ret < 0) {
8395                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8396                         key.objectid, key.type, key.offset, ret);
8397                 goto out;
8398         }
8399         if (ret > 0) {
8400                 fprintf(stderr, "Well that's odd, we just found this key "
8401                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8402                         key.offset);
8403                 ret = -EINVAL;
8404                 goto out;
8405         }
8406         leaf = path->nodes[0];
8407         fi = btrfs_item_ptr(leaf, path->slots[0],
8408                             struct btrfs_file_extent_item);
8409
8410         if (btrfs_file_extent_compression(leaf, fi) &&
8411             dback->disk_bytenr != entry->bytenr) {
8412                 fprintf(stderr, "Ref doesn't match the record start and is "
8413                         "compressed, please take a btrfs-image of this file "
8414                         "system and send it to a btrfs developer so they can "
8415                         "complete this functionality for bytenr %Lu\n",
8416                         dback->disk_bytenr);
8417                 ret = -EINVAL;
8418                 goto out;
8419         }
8420
8421         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8422                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8423         } else if (dback->disk_bytenr > entry->bytenr) {
8424                 u64 off_diff, offset;
8425
8426                 off_diff = dback->disk_bytenr - entry->bytenr;
8427                 offset = btrfs_file_extent_offset(leaf, fi);
8428                 if (dback->disk_bytenr + offset +
8429                     btrfs_file_extent_num_bytes(leaf, fi) >
8430                     entry->bytenr + entry->bytes) {
8431                         fprintf(stderr, "Ref is past the entry end, please "
8432                                 "take a btrfs-image of this file system and "
8433                                 "send it to a btrfs developer, ref %Lu\n",
8434                                 dback->disk_bytenr);
8435                         ret = -EINVAL;
8436                         goto out;
8437                 }
8438                 offset += off_diff;
8439                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8440                 btrfs_set_file_extent_offset(leaf, fi, offset);
8441         } else if (dback->disk_bytenr < entry->bytenr) {
8442                 u64 offset;
8443
8444                 offset = btrfs_file_extent_offset(leaf, fi);
8445                 if (dback->disk_bytenr + offset < entry->bytenr) {
8446                         fprintf(stderr, "Ref is before the entry start, please"
8447                                 " take a btrfs-image of this file system and "
8448                                 "send it to a btrfs developer, ref %Lu\n",
8449                                 dback->disk_bytenr);
8450                         ret = -EINVAL;
8451                         goto out;
8452                 }
8453
8454                 offset += dback->disk_bytenr;
8455                 offset -= entry->bytenr;
8456                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8457                 btrfs_set_file_extent_offset(leaf, fi, offset);
8458         }
8459
8460         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8461
8462         /*
8463          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8464          * only do this if we aren't using compression, otherwise it's a
8465          * trickier case.
8466          */
8467         if (!btrfs_file_extent_compression(leaf, fi))
8468                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8469         else
8470                 printf("ram bytes may be wrong?\n");
8471         btrfs_mark_buffer_dirty(leaf);
8472 out:
8473         err = btrfs_commit_transaction(trans, root);
8474         btrfs_release_path(path);
8475         return ret ? ret : err;
8476 }
8477
8478 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8479                            struct extent_record *rec)
8480 {
8481         struct extent_backref *back;
8482         struct data_backref *dback;
8483         struct extent_entry *entry, *best = NULL;
8484         LIST_HEAD(entries);
8485         int nr_entries = 0;
8486         int broken_entries = 0;
8487         int ret = 0;
8488         short mismatch = 0;
8489
8490         /*
8491          * Metadata is easy and the backrefs should always agree on bytenr and
8492          * size, if not we've got bigger issues.
8493          */
8494         if (rec->metadata)
8495                 return 0;
8496
8497         list_for_each_entry(back, &rec->backrefs, list) {
8498                 if (back->full_backref || !back->is_data)
8499                         continue;
8500
8501                 dback = to_data_backref(back);
8502
8503                 /*
8504                  * We only pay attention to backrefs that we found a real
8505                  * backref for.
8506                  */
8507                 if (dback->found_ref == 0)
8508                         continue;
8509
8510                 /*
8511                  * For now we only catch when the bytes don't match, not the
8512                  * bytenr.  We can easily do this at the same time, but I want
8513                  * to have a fs image to test on before we just add repair
8514                  * functionality willy-nilly so we know we won't screw up the
8515                  * repair.
8516                  */
8517
8518                 entry = find_entry(&entries, dback->disk_bytenr,
8519                                    dback->bytes);
8520                 if (!entry) {
8521                         entry = malloc(sizeof(struct extent_entry));
8522                         if (!entry) {
8523                                 ret = -ENOMEM;
8524                                 goto out;
8525                         }
8526                         memset(entry, 0, sizeof(*entry));
8527                         entry->bytenr = dback->disk_bytenr;
8528                         entry->bytes = dback->bytes;
8529                         list_add_tail(&entry->list, &entries);
8530                         nr_entries++;
8531                 }
8532
8533                 /*
8534                  * If we only have on entry we may think the entries agree when
8535                  * in reality they don't so we have to do some extra checking.
8536                  */
8537                 if (dback->disk_bytenr != rec->start ||
8538                     dback->bytes != rec->nr || back->broken)
8539                         mismatch = 1;
8540
8541                 if (back->broken) {
8542                         entry->broken++;
8543                         broken_entries++;
8544                 }
8545
8546                 entry->count++;
8547         }
8548
8549         /* Yay all the backrefs agree, carry on good sir */
8550         if (nr_entries <= 1 && !mismatch)
8551                 goto out;
8552
8553         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8554                 "%Lu\n", rec->start);
8555
8556         /*
8557          * First we want to see if the backrefs can agree amongst themselves who
8558          * is right, so figure out which one of the entries has the highest
8559          * count.
8560          */
8561         best = find_most_right_entry(&entries);
8562
8563         /*
8564          * Ok so we may have an even split between what the backrefs think, so
8565          * this is where we use the extent ref to see what it thinks.
8566          */
8567         if (!best) {
8568                 entry = find_entry(&entries, rec->start, rec->nr);
8569                 if (!entry && (!broken_entries || !rec->found_rec)) {
8570                         fprintf(stderr, "Backrefs don't agree with each other "
8571                                 "and extent record doesn't agree with anybody,"
8572                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8573                                 rec->start, rec->nr);
8574                         ret = -EINVAL;
8575                         goto out;
8576                 } else if (!entry) {
8577                         /*
8578                          * Ok our backrefs were broken, we'll assume this is the
8579                          * correct value and add an entry for this range.
8580                          */
8581                         entry = malloc(sizeof(struct extent_entry));
8582                         if (!entry) {
8583                                 ret = -ENOMEM;
8584                                 goto out;
8585                         }
8586                         memset(entry, 0, sizeof(*entry));
8587                         entry->bytenr = rec->start;
8588                         entry->bytes = rec->nr;
8589                         list_add_tail(&entry->list, &entries);
8590                         nr_entries++;
8591                 }
8592                 entry->count++;
8593                 best = find_most_right_entry(&entries);
8594                 if (!best) {
8595                         fprintf(stderr, "Backrefs and extent record evenly "
8596                                 "split on who is right, this is going to "
8597                                 "require user input to fix bytenr %Lu bytes "
8598                                 "%Lu\n", rec->start, rec->nr);
8599                         ret = -EINVAL;
8600                         goto out;
8601                 }
8602         }
8603
8604         /*
8605          * I don't think this can happen currently as we'll abort() if we catch
8606          * this case higher up, but in case somebody removes that we still can't
8607          * deal with it properly here yet, so just bail out of that's the case.
8608          */
8609         if (best->bytenr != rec->start) {
8610                 fprintf(stderr, "Extent start and backref starts don't match, "
8611                         "please use btrfs-image on this file system and send "
8612                         "it to a btrfs developer so they can make fsck fix "
8613                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8614                         rec->start, rec->nr);
8615                 ret = -EINVAL;
8616                 goto out;
8617         }
8618
8619         /*
8620          * Ok great we all agreed on an extent record, let's go find the real
8621          * references and fix up the ones that don't match.
8622          */
8623         list_for_each_entry(back, &rec->backrefs, list) {
8624                 if (back->full_backref || !back->is_data)
8625                         continue;
8626
8627                 dback = to_data_backref(back);
8628
8629                 /*
8630                  * Still ignoring backrefs that don't have a real ref attached
8631                  * to them.
8632                  */
8633                 if (dback->found_ref == 0)
8634                         continue;
8635
8636                 if (dback->bytes == best->bytes &&
8637                     dback->disk_bytenr == best->bytenr)
8638                         continue;
8639
8640                 ret = repair_ref(info, path, dback, best);
8641                 if (ret)
8642                         goto out;
8643         }
8644
8645         /*
8646          * Ok we messed with the actual refs, which means we need to drop our
8647          * entire cache and go back and rescan.  I know this is a huge pain and
8648          * adds a lot of extra work, but it's the only way to be safe.  Once all
8649          * the backrefs agree we may not need to do anything to the extent
8650          * record itself.
8651          */
8652         ret = -EAGAIN;
8653 out:
8654         while (!list_empty(&entries)) {
8655                 entry = list_entry(entries.next, struct extent_entry, list);
8656                 list_del_init(&entry->list);
8657                 free(entry);
8658         }
8659         return ret;
8660 }
8661
8662 static int process_duplicates(struct cache_tree *extent_cache,
8663                               struct extent_record *rec)
8664 {
8665         struct extent_record *good, *tmp;
8666         struct cache_extent *cache;
8667         int ret;
8668
8669         /*
8670          * If we found a extent record for this extent then return, or if we
8671          * have more than one duplicate we are likely going to need to delete
8672          * something.
8673          */
8674         if (rec->found_rec || rec->num_duplicates > 1)
8675                 return 0;
8676
8677         /* Shouldn't happen but just in case */
8678         BUG_ON(!rec->num_duplicates);
8679
8680         /*
8681          * So this happens if we end up with a backref that doesn't match the
8682          * actual extent entry.  So either the backref is bad or the extent
8683          * entry is bad.  Either way we want to have the extent_record actually
8684          * reflect what we found in the extent_tree, so we need to take the
8685          * duplicate out and use that as the extent_record since the only way we
8686          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8687          */
8688         remove_cache_extent(extent_cache, &rec->cache);
8689
8690         good = to_extent_record(rec->dups.next);
8691         list_del_init(&good->list);
8692         INIT_LIST_HEAD(&good->backrefs);
8693         INIT_LIST_HEAD(&good->dups);
8694         good->cache.start = good->start;
8695         good->cache.size = good->nr;
8696         good->content_checked = 0;
8697         good->owner_ref_checked = 0;
8698         good->num_duplicates = 0;
8699         good->refs = rec->refs;
8700         list_splice_init(&rec->backrefs, &good->backrefs);
8701         while (1) {
8702                 cache = lookup_cache_extent(extent_cache, good->start,
8703                                             good->nr);
8704                 if (!cache)
8705                         break;
8706                 tmp = container_of(cache, struct extent_record, cache);
8707
8708                 /*
8709                  * If we find another overlapping extent and it's found_rec is
8710                  * set then it's a duplicate and we need to try and delete
8711                  * something.
8712                  */
8713                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8714                         if (list_empty(&good->list))
8715                                 list_add_tail(&good->list,
8716                                               &duplicate_extents);
8717                         good->num_duplicates += tmp->num_duplicates + 1;
8718                         list_splice_init(&tmp->dups, &good->dups);
8719                         list_del_init(&tmp->list);
8720                         list_add_tail(&tmp->list, &good->dups);
8721                         remove_cache_extent(extent_cache, &tmp->cache);
8722                         continue;
8723                 }
8724
8725                 /*
8726                  * Ok we have another non extent item backed extent rec, so lets
8727                  * just add it to this extent and carry on like we did above.
8728                  */
8729                 good->refs += tmp->refs;
8730                 list_splice_init(&tmp->backrefs, &good->backrefs);
8731                 remove_cache_extent(extent_cache, &tmp->cache);
8732                 free(tmp);
8733         }
8734         ret = insert_cache_extent(extent_cache, &good->cache);
8735         BUG_ON(ret);
8736         free(rec);
8737         return good->num_duplicates ? 0 : 1;
8738 }
8739
8740 static int delete_duplicate_records(struct btrfs_root *root,
8741                                     struct extent_record *rec)
8742 {
8743         struct btrfs_trans_handle *trans;
8744         LIST_HEAD(delete_list);
8745         struct btrfs_path path;
8746         struct extent_record *tmp, *good, *n;
8747         int nr_del = 0;
8748         int ret = 0, err;
8749         struct btrfs_key key;
8750
8751         btrfs_init_path(&path);
8752
8753         good = rec;
8754         /* Find the record that covers all of the duplicates. */
8755         list_for_each_entry(tmp, &rec->dups, list) {
8756                 if (good->start < tmp->start)
8757                         continue;
8758                 if (good->nr > tmp->nr)
8759                         continue;
8760
8761                 if (tmp->start + tmp->nr < good->start + good->nr) {
8762                         fprintf(stderr, "Ok we have overlapping extents that "
8763                                 "aren't completely covered by each other, this "
8764                                 "is going to require more careful thought.  "
8765                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8766                                 tmp->start, tmp->nr, good->start, good->nr);
8767                         abort();
8768                 }
8769                 good = tmp;
8770         }
8771
8772         if (good != rec)
8773                 list_add_tail(&rec->list, &delete_list);
8774
8775         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8776                 if (tmp == good)
8777                         continue;
8778                 list_move_tail(&tmp->list, &delete_list);
8779         }
8780
8781         root = root->fs_info->extent_root;
8782         trans = btrfs_start_transaction(root, 1);
8783         if (IS_ERR(trans)) {
8784                 ret = PTR_ERR(trans);
8785                 goto out;
8786         }
8787
8788         list_for_each_entry(tmp, &delete_list, list) {
8789                 if (tmp->found_rec == 0)
8790                         continue;
8791                 key.objectid = tmp->start;
8792                 key.type = BTRFS_EXTENT_ITEM_KEY;
8793                 key.offset = tmp->nr;
8794
8795                 /* Shouldn't happen but just in case */
8796                 if (tmp->metadata) {
8797                         fprintf(stderr, "Well this shouldn't happen, extent "
8798                                 "record overlaps but is metadata? "
8799                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8800                         abort();
8801                 }
8802
8803                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8804                 if (ret) {
8805                         if (ret > 0)
8806                                 ret = -EINVAL;
8807                         break;
8808                 }
8809                 ret = btrfs_del_item(trans, root, &path);
8810                 if (ret)
8811                         break;
8812                 btrfs_release_path(&path);
8813                 nr_del++;
8814         }
8815         err = btrfs_commit_transaction(trans, root);
8816         if (err && !ret)
8817                 ret = err;
8818 out:
8819         while (!list_empty(&delete_list)) {
8820                 tmp = to_extent_record(delete_list.next);
8821                 list_del_init(&tmp->list);
8822                 if (tmp == rec)
8823                         continue;
8824                 free(tmp);
8825         }
8826
8827         while (!list_empty(&rec->dups)) {
8828                 tmp = to_extent_record(rec->dups.next);
8829                 list_del_init(&tmp->list);
8830                 free(tmp);
8831         }
8832
8833         btrfs_release_path(&path);
8834
8835         if (!ret && !nr_del)
8836                 rec->num_duplicates = 0;
8837
8838         return ret ? ret : nr_del;
8839 }
8840
8841 static int find_possible_backrefs(struct btrfs_fs_info *info,
8842                                   struct btrfs_path *path,
8843                                   struct cache_tree *extent_cache,
8844                                   struct extent_record *rec)
8845 {
8846         struct btrfs_root *root;
8847         struct extent_backref *back;
8848         struct data_backref *dback;
8849         struct cache_extent *cache;
8850         struct btrfs_file_extent_item *fi;
8851         struct btrfs_key key;
8852         u64 bytenr, bytes;
8853         int ret;
8854
8855         list_for_each_entry(back, &rec->backrefs, list) {
8856                 /* Don't care about full backrefs (poor unloved backrefs) */
8857                 if (back->full_backref || !back->is_data)
8858                         continue;
8859
8860                 dback = to_data_backref(back);
8861
8862                 /* We found this one, we don't need to do a lookup */
8863                 if (dback->found_ref)
8864                         continue;
8865
8866                 key.objectid = dback->root;
8867                 key.type = BTRFS_ROOT_ITEM_KEY;
8868                 key.offset = (u64)-1;
8869
8870                 root = btrfs_read_fs_root(info, &key);
8871
8872                 /* No root, definitely a bad ref, skip */
8873                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8874                         continue;
8875                 /* Other err, exit */
8876                 if (IS_ERR(root))
8877                         return PTR_ERR(root);
8878
8879                 key.objectid = dback->owner;
8880                 key.type = BTRFS_EXTENT_DATA_KEY;
8881                 key.offset = dback->offset;
8882                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8883                 if (ret) {
8884                         btrfs_release_path(path);
8885                         if (ret < 0)
8886                                 return ret;
8887                         /* Didn't find it, we can carry on */
8888                         ret = 0;
8889                         continue;
8890                 }
8891
8892                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8893                                     struct btrfs_file_extent_item);
8894                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8895                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8896                 btrfs_release_path(path);
8897                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8898                 if (cache) {
8899                         struct extent_record *tmp;
8900                         tmp = container_of(cache, struct extent_record, cache);
8901
8902                         /*
8903                          * If we found an extent record for the bytenr for this
8904                          * particular backref then we can't add it to our
8905                          * current extent record.  We only want to add backrefs
8906                          * that don't have a corresponding extent item in the
8907                          * extent tree since they likely belong to this record
8908                          * and we need to fix it if it doesn't match bytenrs.
8909                          */
8910                         if  (tmp->found_rec)
8911                                 continue;
8912                 }
8913
8914                 dback->found_ref += 1;
8915                 dback->disk_bytenr = bytenr;
8916                 dback->bytes = bytes;
8917
8918                 /*
8919                  * Set this so the verify backref code knows not to trust the
8920                  * values in this backref.
8921                  */
8922                 back->broken = 1;
8923         }
8924
8925         return 0;
8926 }
8927
8928 /*
8929  * Record orphan data ref into corresponding root.
8930  *
8931  * Return 0 if the extent item contains data ref and recorded.
8932  * Return 1 if the extent item contains no useful data ref
8933  *   On that case, it may contains only shared_dataref or metadata backref
8934  *   or the file extent exists(this should be handled by the extent bytenr
8935  *   recovery routine)
8936  * Return <0 if something goes wrong.
8937  */
8938 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8939                                       struct extent_record *rec)
8940 {
8941         struct btrfs_key key;
8942         struct btrfs_root *dest_root;
8943         struct extent_backref *back;
8944         struct data_backref *dback;
8945         struct orphan_data_extent *orphan;
8946         struct btrfs_path path;
8947         int recorded_data_ref = 0;
8948         int ret = 0;
8949
8950         if (rec->metadata)
8951                 return 1;
8952         btrfs_init_path(&path);
8953         list_for_each_entry(back, &rec->backrefs, list) {
8954                 if (back->full_backref || !back->is_data ||
8955                     !back->found_extent_tree)
8956                         continue;
8957                 dback = to_data_backref(back);
8958                 if (dback->found_ref)
8959                         continue;
8960                 key.objectid = dback->root;
8961                 key.type = BTRFS_ROOT_ITEM_KEY;
8962                 key.offset = (u64)-1;
8963
8964                 dest_root = btrfs_read_fs_root(fs_info, &key);
8965
8966                 /* For non-exist root we just skip it */
8967                 if (IS_ERR(dest_root) || !dest_root)
8968                         continue;
8969
8970                 key.objectid = dback->owner;
8971                 key.type = BTRFS_EXTENT_DATA_KEY;
8972                 key.offset = dback->offset;
8973
8974                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8975                 btrfs_release_path(&path);
8976                 /*
8977                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8978                  * we need to record it for inode/file extent rebuild.
8979                  * For ret > 0, we record it only for file extent rebuild.
8980                  * For ret == 0, the file extent exists but only bytenr
8981                  * mismatch, let the original bytenr fix routine to handle,
8982                  * don't record it.
8983                  */
8984                 if (ret == 0)
8985                         continue;
8986                 ret = 0;
8987                 orphan = malloc(sizeof(*orphan));
8988                 if (!orphan) {
8989                         ret = -ENOMEM;
8990                         goto out;
8991                 }
8992                 INIT_LIST_HEAD(&orphan->list);
8993                 orphan->root = dback->root;
8994                 orphan->objectid = dback->owner;
8995                 orphan->offset = dback->offset;
8996                 orphan->disk_bytenr = rec->cache.start;
8997                 orphan->disk_len = rec->cache.size;
8998                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8999                 recorded_data_ref = 1;
9000         }
9001 out:
9002         btrfs_release_path(&path);
9003         if (!ret)
9004                 return !recorded_data_ref;
9005         else
9006                 return ret;
9007 }
9008
9009 /*
9010  * when an incorrect extent item is found, this will delete
9011  * all of the existing entries for it and recreate them
9012  * based on what the tree scan found.
9013  */
9014 static int fixup_extent_refs(struct btrfs_fs_info *info,
9015                              struct cache_tree *extent_cache,
9016                              struct extent_record *rec)
9017 {
9018         struct btrfs_trans_handle *trans = NULL;
9019         int ret;
9020         struct btrfs_path path;
9021         struct list_head *cur = rec->backrefs.next;
9022         struct cache_extent *cache;
9023         struct extent_backref *back;
9024         int allocated = 0;
9025         u64 flags = 0;
9026
9027         if (rec->flag_block_full_backref)
9028                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9029
9030         btrfs_init_path(&path);
9031         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9032                 /*
9033                  * Sometimes the backrefs themselves are so broken they don't
9034                  * get attached to any meaningful rec, so first go back and
9035                  * check any of our backrefs that we couldn't find and throw
9036                  * them into the list if we find the backref so that
9037                  * verify_backrefs can figure out what to do.
9038                  */
9039                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9040                 if (ret < 0)
9041                         goto out;
9042         }
9043
9044         /* step one, make sure all of the backrefs agree */
9045         ret = verify_backrefs(info, &path, rec);
9046         if (ret < 0)
9047                 goto out;
9048
9049         trans = btrfs_start_transaction(info->extent_root, 1);
9050         if (IS_ERR(trans)) {
9051                 ret = PTR_ERR(trans);
9052                 goto out;
9053         }
9054
9055         /* step two, delete all the existing records */
9056         ret = delete_extent_records(trans, info->extent_root, &path,
9057                                     rec->start);
9058
9059         if (ret < 0)
9060                 goto out;
9061
9062         /* was this block corrupt?  If so, don't add references to it */
9063         cache = lookup_cache_extent(info->corrupt_blocks,
9064                                     rec->start, rec->max_size);
9065         if (cache) {
9066                 ret = 0;
9067                 goto out;
9068         }
9069
9070         /* step three, recreate all the refs we did find */
9071         while(cur != &rec->backrefs) {
9072                 back = to_extent_backref(cur);
9073                 cur = cur->next;
9074
9075                 /*
9076                  * if we didn't find any references, don't create a
9077                  * new extent record
9078                  */
9079                 if (!back->found_ref)
9080                         continue;
9081
9082                 rec->bad_full_backref = 0;
9083                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9084                 allocated = 1;
9085
9086                 if (ret)
9087                         goto out;
9088         }
9089 out:
9090         if (trans) {
9091                 int err = btrfs_commit_transaction(trans, info->extent_root);
9092                 if (!ret)
9093                         ret = err;
9094         }
9095
9096         if (!ret)
9097                 fprintf(stderr, "Repaired extent references for %llu\n",
9098                                 (unsigned long long)rec->start);
9099
9100         btrfs_release_path(&path);
9101         return ret;
9102 }
9103
9104 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9105                               struct extent_record *rec)
9106 {
9107         struct btrfs_trans_handle *trans;
9108         struct btrfs_root *root = fs_info->extent_root;
9109         struct btrfs_path path;
9110         struct btrfs_extent_item *ei;
9111         struct btrfs_key key;
9112         u64 flags;
9113         int ret = 0;
9114
9115         key.objectid = rec->start;
9116         if (rec->metadata) {
9117                 key.type = BTRFS_METADATA_ITEM_KEY;
9118                 key.offset = rec->info_level;
9119         } else {
9120                 key.type = BTRFS_EXTENT_ITEM_KEY;
9121                 key.offset = rec->max_size;
9122         }
9123
9124         trans = btrfs_start_transaction(root, 0);
9125         if (IS_ERR(trans))
9126                 return PTR_ERR(trans);
9127
9128         btrfs_init_path(&path);
9129         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9130         if (ret < 0) {
9131                 btrfs_release_path(&path);
9132                 btrfs_commit_transaction(trans, root);
9133                 return ret;
9134         } else if (ret) {
9135                 fprintf(stderr, "Didn't find extent for %llu\n",
9136                         (unsigned long long)rec->start);
9137                 btrfs_release_path(&path);
9138                 btrfs_commit_transaction(trans, root);
9139                 return -ENOENT;
9140         }
9141
9142         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9143                             struct btrfs_extent_item);
9144         flags = btrfs_extent_flags(path.nodes[0], ei);
9145         if (rec->flag_block_full_backref) {
9146                 fprintf(stderr, "setting full backref on %llu\n",
9147                         (unsigned long long)key.objectid);
9148                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9149         } else {
9150                 fprintf(stderr, "clearing full backref on %llu\n",
9151                         (unsigned long long)key.objectid);
9152                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9153         }
9154         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9155         btrfs_mark_buffer_dirty(path.nodes[0]);
9156         btrfs_release_path(&path);
9157         ret = btrfs_commit_transaction(trans, root);
9158         if (!ret)
9159                 fprintf(stderr, "Repaired extent flags for %llu\n",
9160                                 (unsigned long long)rec->start);
9161
9162         return ret;
9163 }
9164
9165 /* right now we only prune from the extent allocation tree */
9166 static int prune_one_block(struct btrfs_trans_handle *trans,
9167                            struct btrfs_fs_info *info,
9168                            struct btrfs_corrupt_block *corrupt)
9169 {
9170         int ret;
9171         struct btrfs_path path;
9172         struct extent_buffer *eb;
9173         u64 found;
9174         int slot;
9175         int nritems;
9176         int level = corrupt->level + 1;
9177
9178         btrfs_init_path(&path);
9179 again:
9180         /* we want to stop at the parent to our busted block */
9181         path.lowest_level = level;
9182
9183         ret = btrfs_search_slot(trans, info->extent_root,
9184                                 &corrupt->key, &path, -1, 1);
9185
9186         if (ret < 0)
9187                 goto out;
9188
9189         eb = path.nodes[level];
9190         if (!eb) {
9191                 ret = -ENOENT;
9192                 goto out;
9193         }
9194
9195         /*
9196          * hopefully the search gave us the block we want to prune,
9197          * lets try that first
9198          */
9199         slot = path.slots[level];
9200         found =  btrfs_node_blockptr(eb, slot);
9201         if (found == corrupt->cache.start)
9202                 goto del_ptr;
9203
9204         nritems = btrfs_header_nritems(eb);
9205
9206         /* the search failed, lets scan this node and hope we find it */
9207         for (slot = 0; slot < nritems; slot++) {
9208                 found =  btrfs_node_blockptr(eb, slot);
9209                 if (found == corrupt->cache.start)
9210                         goto del_ptr;
9211         }
9212         /*
9213          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9214          * to this block
9215          */
9216         if (eb == info->extent_root->node) {
9217                 ret = -ENOENT;
9218                 goto out;
9219         } else {
9220                 level++;
9221                 btrfs_release_path(&path);
9222                 goto again;
9223         }
9224
9225 del_ptr:
9226         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9227         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9228
9229 out:
9230         btrfs_release_path(&path);
9231         return ret;
9232 }
9233
9234 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9235 {
9236         struct btrfs_trans_handle *trans = NULL;
9237         struct cache_extent *cache;
9238         struct btrfs_corrupt_block *corrupt;
9239
9240         while (1) {
9241                 cache = search_cache_extent(info->corrupt_blocks, 0);
9242                 if (!cache)
9243                         break;
9244                 if (!trans) {
9245                         trans = btrfs_start_transaction(info->extent_root, 1);
9246                         if (IS_ERR(trans))
9247                                 return PTR_ERR(trans);
9248                 }
9249                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9250                 prune_one_block(trans, info, corrupt);
9251                 remove_cache_extent(info->corrupt_blocks, cache);
9252         }
9253         if (trans)
9254                 return btrfs_commit_transaction(trans, info->extent_root);
9255         return 0;
9256 }
9257
9258 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9259 {
9260         struct btrfs_block_group_cache *cache;
9261         u64 start, end;
9262         int ret;
9263
9264         while (1) {
9265                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9266                                             &start, &end, EXTENT_DIRTY);
9267                 if (ret)
9268                         break;
9269                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9270         }
9271
9272         start = 0;
9273         while (1) {
9274                 cache = btrfs_lookup_first_block_group(fs_info, start);
9275                 if (!cache)
9276                         break;
9277                 if (cache->cached)
9278                         cache->cached = 0;
9279                 start = cache->key.objectid + cache->key.offset;
9280         }
9281 }
9282
9283 static int check_extent_refs(struct btrfs_root *root,
9284                              struct cache_tree *extent_cache)
9285 {
9286         struct extent_record *rec;
9287         struct cache_extent *cache;
9288         int ret = 0;
9289         int had_dups = 0;
9290
9291         if (repair) {
9292                 /*
9293                  * if we're doing a repair, we have to make sure
9294                  * we don't allocate from the problem extents.
9295                  * In the worst case, this will be all the
9296                  * extents in the FS
9297                  */
9298                 cache = search_cache_extent(extent_cache, 0);
9299                 while(cache) {
9300                         rec = container_of(cache, struct extent_record, cache);
9301                         set_extent_dirty(root->fs_info->excluded_extents,
9302                                          rec->start,
9303                                          rec->start + rec->max_size - 1);
9304                         cache = next_cache_extent(cache);
9305                 }
9306
9307                 /* pin down all the corrupted blocks too */
9308                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9309                 while(cache) {
9310                         set_extent_dirty(root->fs_info->excluded_extents,
9311                                          cache->start,
9312                                          cache->start + cache->size - 1);
9313                         cache = next_cache_extent(cache);
9314                 }
9315                 prune_corrupt_blocks(root->fs_info);
9316                 reset_cached_block_groups(root->fs_info);
9317         }
9318
9319         reset_cached_block_groups(root->fs_info);
9320
9321         /*
9322          * We need to delete any duplicate entries we find first otherwise we
9323          * could mess up the extent tree when we have backrefs that actually
9324          * belong to a different extent item and not the weird duplicate one.
9325          */
9326         while (repair && !list_empty(&duplicate_extents)) {
9327                 rec = to_extent_record(duplicate_extents.next);
9328                 list_del_init(&rec->list);
9329
9330                 /* Sometimes we can find a backref before we find an actual
9331                  * extent, so we need to process it a little bit to see if there
9332                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9333                  * if this is a backref screwup.  If we need to delete stuff
9334                  * process_duplicates() will return 0, otherwise it will return
9335                  * 1 and we
9336                  */
9337                 if (process_duplicates(extent_cache, rec))
9338                         continue;
9339                 ret = delete_duplicate_records(root, rec);
9340                 if (ret < 0)
9341                         return ret;
9342                 /*
9343                  * delete_duplicate_records will return the number of entries
9344                  * deleted, so if it's greater than 0 then we know we actually
9345                  * did something and we need to remove.
9346                  */
9347                 if (ret)
9348                         had_dups = 1;
9349         }
9350
9351         if (had_dups)
9352                 return -EAGAIN;
9353
9354         while(1) {
9355                 int cur_err = 0;
9356                 int fix = 0;
9357
9358                 cache = search_cache_extent(extent_cache, 0);
9359                 if (!cache)
9360                         break;
9361                 rec = container_of(cache, struct extent_record, cache);
9362                 if (rec->num_duplicates) {
9363                         fprintf(stderr, "extent item %llu has multiple extent "
9364                                 "items\n", (unsigned long long)rec->start);
9365                         cur_err = 1;
9366                 }
9367
9368                 if (rec->refs != rec->extent_item_refs) {
9369                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9370                                 (unsigned long long)rec->start,
9371                                 (unsigned long long)rec->nr);
9372                         fprintf(stderr, "extent item %llu, found %llu\n",
9373                                 (unsigned long long)rec->extent_item_refs,
9374                                 (unsigned long long)rec->refs);
9375                         ret = record_orphan_data_extents(root->fs_info, rec);
9376                         if (ret < 0)
9377                                 goto repair_abort;
9378                         fix = ret;
9379                         cur_err = 1;
9380                 }
9381                 if (all_backpointers_checked(rec, 1)) {
9382                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9383                                 (unsigned long long)rec->start,
9384                                 (unsigned long long)rec->nr);
9385                         fix = 1;
9386                         cur_err = 1;
9387                 }
9388                 if (!rec->owner_ref_checked) {
9389                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9390                                 (unsigned long long)rec->start,
9391                                 (unsigned long long)rec->nr);
9392                         fix = 1;
9393                         cur_err = 1;
9394                 }
9395
9396                 if (repair && fix) {
9397                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9398                         if (ret)
9399                                 goto repair_abort;
9400                 }
9401
9402
9403                 if (rec->bad_full_backref) {
9404                         fprintf(stderr, "bad full backref, on [%llu]\n",
9405                                 (unsigned long long)rec->start);
9406                         if (repair) {
9407                                 ret = fixup_extent_flags(root->fs_info, rec);
9408                                 if (ret)
9409                                         goto repair_abort;
9410                                 fix = 1;
9411                         }
9412                         cur_err = 1;
9413                 }
9414                 /*
9415                  * Although it's not a extent ref's problem, we reuse this
9416                  * routine for error reporting.
9417                  * No repair function yet.
9418                  */
9419                 if (rec->crossing_stripes) {
9420                         fprintf(stderr,
9421                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9422                                 rec->start, rec->start + rec->max_size);
9423                         cur_err = 1;
9424                 }
9425
9426                 if (rec->wrong_chunk_type) {
9427                         fprintf(stderr,
9428                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9429                                 rec->start, rec->start + rec->max_size);
9430                         cur_err = 1;
9431                 }
9432
9433                 remove_cache_extent(extent_cache, cache);
9434                 free_all_extent_backrefs(rec);
9435                 if (!init_extent_tree && repair && (!cur_err || fix))
9436                         clear_extent_dirty(root->fs_info->excluded_extents,
9437                                            rec->start,
9438                                            rec->start + rec->max_size - 1);
9439                 free(rec);
9440         }
9441 repair_abort:
9442         if (repair) {
9443                 if (ret && ret != -EAGAIN) {
9444                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9445                         exit(1);
9446                 } else if (!ret) {
9447                         struct btrfs_trans_handle *trans;
9448
9449                         root = root->fs_info->extent_root;
9450                         trans = btrfs_start_transaction(root, 1);
9451                         if (IS_ERR(trans)) {
9452                                 ret = PTR_ERR(trans);
9453                                 goto repair_abort;
9454                         }
9455
9456                         btrfs_fix_block_accounting(trans, root);
9457                         ret = btrfs_commit_transaction(trans, root);
9458                         if (ret)
9459                                 goto repair_abort;
9460                 }
9461                 return ret;
9462         }
9463         return 0;
9464 }
9465
9466 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9467 {
9468         u64 stripe_size;
9469
9470         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9471                 stripe_size = length;
9472                 stripe_size /= num_stripes;
9473         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9474                 stripe_size = length * 2;
9475                 stripe_size /= num_stripes;
9476         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9477                 stripe_size = length;
9478                 stripe_size /= (num_stripes - 1);
9479         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9480                 stripe_size = length;
9481                 stripe_size /= (num_stripes - 2);
9482         } else {
9483                 stripe_size = length;
9484         }
9485         return stripe_size;
9486 }
9487
9488 /*
9489  * Check the chunk with its block group/dev list ref:
9490  * Return 0 if all refs seems valid.
9491  * Return 1 if part of refs seems valid, need later check for rebuild ref
9492  * like missing block group and needs to search extent tree to rebuild them.
9493  * Return -1 if essential refs are missing and unable to rebuild.
9494  */
9495 static int check_chunk_refs(struct chunk_record *chunk_rec,
9496                             struct block_group_tree *block_group_cache,
9497                             struct device_extent_tree *dev_extent_cache,
9498                             int silent)
9499 {
9500         struct cache_extent *block_group_item;
9501         struct block_group_record *block_group_rec;
9502         struct cache_extent *dev_extent_item;
9503         struct device_extent_record *dev_extent_rec;
9504         u64 devid;
9505         u64 offset;
9506         u64 length;
9507         int metadump_v2 = 0;
9508         int i;
9509         int ret = 0;
9510
9511         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9512                                                chunk_rec->offset,
9513                                                chunk_rec->length);
9514         if (block_group_item) {
9515                 block_group_rec = container_of(block_group_item,
9516                                                struct block_group_record,
9517                                                cache);
9518                 if (chunk_rec->length != block_group_rec->offset ||
9519                     chunk_rec->offset != block_group_rec->objectid ||
9520                     (!metadump_v2 &&
9521                      chunk_rec->type_flags != block_group_rec->flags)) {
9522                         if (!silent)
9523                                 fprintf(stderr,
9524                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9525                                         chunk_rec->objectid,
9526                                         chunk_rec->type,
9527                                         chunk_rec->offset,
9528                                         chunk_rec->length,
9529                                         chunk_rec->offset,
9530                                         chunk_rec->type_flags,
9531                                         block_group_rec->objectid,
9532                                         block_group_rec->type,
9533                                         block_group_rec->offset,
9534                                         block_group_rec->offset,
9535                                         block_group_rec->objectid,
9536                                         block_group_rec->flags);
9537                         ret = -1;
9538                 } else {
9539                         list_del_init(&block_group_rec->list);
9540                         chunk_rec->bg_rec = block_group_rec;
9541                 }
9542         } else {
9543                 if (!silent)
9544                         fprintf(stderr,
9545                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9546                                 chunk_rec->objectid,
9547                                 chunk_rec->type,
9548                                 chunk_rec->offset,
9549                                 chunk_rec->length,
9550                                 chunk_rec->offset,
9551                                 chunk_rec->type_flags);
9552                 ret = 1;
9553         }
9554
9555         if (metadump_v2)
9556                 return ret;
9557
9558         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9559                                     chunk_rec->num_stripes);
9560         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9561                 devid = chunk_rec->stripes[i].devid;
9562                 offset = chunk_rec->stripes[i].offset;
9563                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9564                                                        devid, offset, length);
9565                 if (dev_extent_item) {
9566                         dev_extent_rec = container_of(dev_extent_item,
9567                                                 struct device_extent_record,
9568                                                 cache);
9569                         if (dev_extent_rec->objectid != devid ||
9570                             dev_extent_rec->offset != offset ||
9571                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9572                             dev_extent_rec->length != length) {
9573                                 if (!silent)
9574                                         fprintf(stderr,
9575                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9576                                                 chunk_rec->objectid,
9577                                                 chunk_rec->type,
9578                                                 chunk_rec->offset,
9579                                                 chunk_rec->stripes[i].devid,
9580                                                 chunk_rec->stripes[i].offset,
9581                                                 dev_extent_rec->objectid,
9582                                                 dev_extent_rec->offset,
9583                                                 dev_extent_rec->length);
9584                                 ret = -1;
9585                         } else {
9586                                 list_move(&dev_extent_rec->chunk_list,
9587                                           &chunk_rec->dextents);
9588                         }
9589                 } else {
9590                         if (!silent)
9591                                 fprintf(stderr,
9592                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9593                                         chunk_rec->objectid,
9594                                         chunk_rec->type,
9595                                         chunk_rec->offset,
9596                                         chunk_rec->stripes[i].devid,
9597                                         chunk_rec->stripes[i].offset);
9598                         ret = -1;
9599                 }
9600         }
9601         return ret;
9602 }
9603
9604 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9605 int check_chunks(struct cache_tree *chunk_cache,
9606                  struct block_group_tree *block_group_cache,
9607                  struct device_extent_tree *dev_extent_cache,
9608                  struct list_head *good, struct list_head *bad,
9609                  struct list_head *rebuild, int silent)
9610 {
9611         struct cache_extent *chunk_item;
9612         struct chunk_record *chunk_rec;
9613         struct block_group_record *bg_rec;
9614         struct device_extent_record *dext_rec;
9615         int err;
9616         int ret = 0;
9617
9618         chunk_item = first_cache_extent(chunk_cache);
9619         while (chunk_item) {
9620                 chunk_rec = container_of(chunk_item, struct chunk_record,
9621                                          cache);
9622                 err = check_chunk_refs(chunk_rec, block_group_cache,
9623                                        dev_extent_cache, silent);
9624                 if (err < 0)
9625                         ret = err;
9626                 if (err == 0 && good)
9627                         list_add_tail(&chunk_rec->list, good);
9628                 if (err > 0 && rebuild)
9629                         list_add_tail(&chunk_rec->list, rebuild);
9630                 if (err < 0 && bad)
9631                         list_add_tail(&chunk_rec->list, bad);
9632                 chunk_item = next_cache_extent(chunk_item);
9633         }
9634
9635         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9636                 if (!silent)
9637                         fprintf(stderr,
9638                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9639                                 bg_rec->objectid,
9640                                 bg_rec->offset,
9641                                 bg_rec->flags);
9642                 if (!ret)
9643                         ret = 1;
9644         }
9645
9646         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9647                             chunk_list) {
9648                 if (!silent)
9649                         fprintf(stderr,
9650                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9651                                 dext_rec->objectid,
9652                                 dext_rec->offset,
9653                                 dext_rec->length);
9654                 if (!ret)
9655                         ret = 1;
9656         }
9657         return ret;
9658 }
9659
9660
9661 static int check_device_used(struct device_record *dev_rec,
9662                              struct device_extent_tree *dext_cache)
9663 {
9664         struct cache_extent *cache;
9665         struct device_extent_record *dev_extent_rec;
9666         u64 total_byte = 0;
9667
9668         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9669         while (cache) {
9670                 dev_extent_rec = container_of(cache,
9671                                               struct device_extent_record,
9672                                               cache);
9673                 if (dev_extent_rec->objectid != dev_rec->devid)
9674                         break;
9675
9676                 list_del_init(&dev_extent_rec->device_list);
9677                 total_byte += dev_extent_rec->length;
9678                 cache = next_cache_extent(cache);
9679         }
9680
9681         if (total_byte != dev_rec->byte_used) {
9682                 fprintf(stderr,
9683                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9684                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9685                         dev_rec->type, dev_rec->offset);
9686                 return -1;
9687         } else {
9688                 return 0;
9689         }
9690 }
9691
9692 /* check btrfs_dev_item -> btrfs_dev_extent */
9693 static int check_devices(struct rb_root *dev_cache,
9694                          struct device_extent_tree *dev_extent_cache)
9695 {
9696         struct rb_node *dev_node;
9697         struct device_record *dev_rec;
9698         struct device_extent_record *dext_rec;
9699         int err;
9700         int ret = 0;
9701
9702         dev_node = rb_first(dev_cache);
9703         while (dev_node) {
9704                 dev_rec = container_of(dev_node, struct device_record, node);
9705                 err = check_device_used(dev_rec, dev_extent_cache);
9706                 if (err)
9707                         ret = err;
9708
9709                 dev_node = rb_next(dev_node);
9710         }
9711         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9712                             device_list) {
9713                 fprintf(stderr,
9714                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9715                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9716                 if (!ret)
9717                         ret = 1;
9718         }
9719         return ret;
9720 }
9721
9722 static int add_root_item_to_list(struct list_head *head,
9723                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9724                                   u8 level, u8 drop_level,
9725                                   int level_size, struct btrfs_key *drop_key)
9726 {
9727
9728         struct root_item_record *ri_rec;
9729         ri_rec = malloc(sizeof(*ri_rec));
9730         if (!ri_rec)
9731                 return -ENOMEM;
9732         ri_rec->bytenr = bytenr;
9733         ri_rec->objectid = objectid;
9734         ri_rec->level = level;
9735         ri_rec->level_size = level_size;
9736         ri_rec->drop_level = drop_level;
9737         ri_rec->last_snapshot = last_snapshot;
9738         if (drop_key)
9739                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9740         list_add_tail(&ri_rec->list, head);
9741
9742         return 0;
9743 }
9744
9745 static void free_root_item_list(struct list_head *list)
9746 {
9747         struct root_item_record *ri_rec;
9748
9749         while (!list_empty(list)) {
9750                 ri_rec = list_first_entry(list, struct root_item_record,
9751                                           list);
9752                 list_del_init(&ri_rec->list);
9753                 free(ri_rec);
9754         }
9755 }
9756
9757 static int deal_root_from_list(struct list_head *list,
9758                                struct btrfs_root *root,
9759                                struct block_info *bits,
9760                                int bits_nr,
9761                                struct cache_tree *pending,
9762                                struct cache_tree *seen,
9763                                struct cache_tree *reada,
9764                                struct cache_tree *nodes,
9765                                struct cache_tree *extent_cache,
9766                                struct cache_tree *chunk_cache,
9767                                struct rb_root *dev_cache,
9768                                struct block_group_tree *block_group_cache,
9769                                struct device_extent_tree *dev_extent_cache)
9770 {
9771         int ret = 0;
9772         u64 last;
9773
9774         while (!list_empty(list)) {
9775                 struct root_item_record *rec;
9776                 struct extent_buffer *buf;
9777                 rec = list_entry(list->next,
9778                                  struct root_item_record, list);
9779                 last = 0;
9780                 buf = read_tree_block(root->fs_info->tree_root,
9781                                       rec->bytenr, rec->level_size, 0);
9782                 if (!extent_buffer_uptodate(buf)) {
9783                         free_extent_buffer(buf);
9784                         ret = -EIO;
9785                         break;
9786                 }
9787                 ret = add_root_to_pending(buf, extent_cache, pending,
9788                                     seen, nodes, rec->objectid);
9789                 if (ret < 0)
9790                         break;
9791                 /*
9792                  * To rebuild extent tree, we need deal with snapshot
9793                  * one by one, otherwise we deal with node firstly which
9794                  * can maximize readahead.
9795                  */
9796                 while (1) {
9797                         ret = run_next_block(root, bits, bits_nr, &last,
9798                                              pending, seen, reada, nodes,
9799                                              extent_cache, chunk_cache,
9800                                              dev_cache, block_group_cache,
9801                                              dev_extent_cache, rec);
9802                         if (ret != 0)
9803                                 break;
9804                 }
9805                 free_extent_buffer(buf);
9806                 list_del(&rec->list);
9807                 free(rec);
9808                 if (ret < 0)
9809                         break;
9810         }
9811         while (ret >= 0) {
9812                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9813                                      reada, nodes, extent_cache, chunk_cache,
9814                                      dev_cache, block_group_cache,
9815                                      dev_extent_cache, NULL);
9816                 if (ret != 0) {
9817                         if (ret > 0)
9818                                 ret = 0;
9819                         break;
9820                 }
9821         }
9822         return ret;
9823 }
9824
9825 static int check_chunks_and_extents(struct btrfs_root *root)
9826 {
9827         struct rb_root dev_cache;
9828         struct cache_tree chunk_cache;
9829         struct block_group_tree block_group_cache;
9830         struct device_extent_tree dev_extent_cache;
9831         struct cache_tree extent_cache;
9832         struct cache_tree seen;
9833         struct cache_tree pending;
9834         struct cache_tree reada;
9835         struct cache_tree nodes;
9836         struct extent_io_tree excluded_extents;
9837         struct cache_tree corrupt_blocks;
9838         struct btrfs_path path;
9839         struct btrfs_key key;
9840         struct btrfs_key found_key;
9841         int ret, err = 0;
9842         struct block_info *bits;
9843         int bits_nr;
9844         struct extent_buffer *leaf;
9845         int slot;
9846         struct btrfs_root_item ri;
9847         struct list_head dropping_trees;
9848         struct list_head normal_trees;
9849         struct btrfs_root *root1;
9850         u64 objectid;
9851         u32 level_size;
9852         u8 level;
9853
9854         dev_cache = RB_ROOT;
9855         cache_tree_init(&chunk_cache);
9856         block_group_tree_init(&block_group_cache);
9857         device_extent_tree_init(&dev_extent_cache);
9858
9859         cache_tree_init(&extent_cache);
9860         cache_tree_init(&seen);
9861         cache_tree_init(&pending);
9862         cache_tree_init(&nodes);
9863         cache_tree_init(&reada);
9864         cache_tree_init(&corrupt_blocks);
9865         extent_io_tree_init(&excluded_extents);
9866         INIT_LIST_HEAD(&dropping_trees);
9867         INIT_LIST_HEAD(&normal_trees);
9868
9869         if (repair) {
9870                 root->fs_info->excluded_extents = &excluded_extents;
9871                 root->fs_info->fsck_extent_cache = &extent_cache;
9872                 root->fs_info->free_extent_hook = free_extent_hook;
9873                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9874         }
9875
9876         bits_nr = 1024;
9877         bits = malloc(bits_nr * sizeof(struct block_info));
9878         if (!bits) {
9879                 perror("malloc");
9880                 exit(1);
9881         }
9882
9883         if (ctx.progress_enabled) {
9884                 ctx.tp = TASK_EXTENTS;
9885                 task_start(ctx.info);
9886         }
9887
9888 again:
9889         root1 = root->fs_info->tree_root;
9890         level = btrfs_header_level(root1->node);
9891         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9892                                     root1->node->start, 0, level, 0,
9893                                     root1->nodesize, NULL);
9894         if (ret < 0)
9895                 goto out;
9896         root1 = root->fs_info->chunk_root;
9897         level = btrfs_header_level(root1->node);
9898         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9899                                     root1->node->start, 0, level, 0,
9900                                     root1->nodesize, NULL);
9901         if (ret < 0)
9902                 goto out;
9903         btrfs_init_path(&path);
9904         key.offset = 0;
9905         key.objectid = 0;
9906         key.type = BTRFS_ROOT_ITEM_KEY;
9907         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9908                                         &key, &path, 0, 0);
9909         if (ret < 0)
9910                 goto out;
9911         while(1) {
9912                 leaf = path.nodes[0];
9913                 slot = path.slots[0];
9914                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9915                         ret = btrfs_next_leaf(root, &path);
9916                         if (ret != 0)
9917                                 break;
9918                         leaf = path.nodes[0];
9919                         slot = path.slots[0];
9920                 }
9921                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9922                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9923                         unsigned long offset;
9924                         u64 last_snapshot;
9925
9926                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9927                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9928                         last_snapshot = btrfs_root_last_snapshot(&ri);
9929                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9930                                 level = btrfs_root_level(&ri);
9931                                 level_size = root->nodesize;
9932                                 ret = add_root_item_to_list(&normal_trees,
9933                                                 found_key.objectid,
9934                                                 btrfs_root_bytenr(&ri),
9935                                                 last_snapshot, level,
9936                                                 0, level_size, NULL);
9937                                 if (ret < 0)
9938                                         goto out;
9939                         } else {
9940                                 level = btrfs_root_level(&ri);
9941                                 level_size = root->nodesize;
9942                                 objectid = found_key.objectid;
9943                                 btrfs_disk_key_to_cpu(&found_key,
9944                                                       &ri.drop_progress);
9945                                 ret = add_root_item_to_list(&dropping_trees,
9946                                                 objectid,
9947                                                 btrfs_root_bytenr(&ri),
9948                                                 last_snapshot, level,
9949                                                 ri.drop_level,
9950                                                 level_size, &found_key);
9951                                 if (ret < 0)
9952                                         goto out;
9953                         }
9954                 }
9955                 path.slots[0]++;
9956         }
9957         btrfs_release_path(&path);
9958
9959         /*
9960          * check_block can return -EAGAIN if it fixes something, please keep
9961          * this in mind when dealing with return values from these functions, if
9962          * we get -EAGAIN we want to fall through and restart the loop.
9963          */
9964         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9965                                   &seen, &reada, &nodes, &extent_cache,
9966                                   &chunk_cache, &dev_cache, &block_group_cache,
9967                                   &dev_extent_cache);
9968         if (ret < 0) {
9969                 if (ret == -EAGAIN)
9970                         goto loop;
9971                 goto out;
9972         }
9973         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9974                                   &pending, &seen, &reada, &nodes,
9975                                   &extent_cache, &chunk_cache, &dev_cache,
9976                                   &block_group_cache, &dev_extent_cache);
9977         if (ret < 0) {
9978                 if (ret == -EAGAIN)
9979                         goto loop;
9980                 goto out;
9981         }
9982
9983         ret = check_chunks(&chunk_cache, &block_group_cache,
9984                            &dev_extent_cache, NULL, NULL, NULL, 0);
9985         if (ret) {
9986                 if (ret == -EAGAIN)
9987                         goto loop;
9988                 err = ret;
9989         }
9990
9991         ret = check_extent_refs(root, &extent_cache);
9992         if (ret < 0) {
9993                 if (ret == -EAGAIN)
9994                         goto loop;
9995                 goto out;
9996         }
9997
9998         ret = check_devices(&dev_cache, &dev_extent_cache);
9999         if (ret && err)
10000                 ret = err;
10001
10002 out:
10003         task_stop(ctx.info);
10004         if (repair) {
10005                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10006                 extent_io_tree_cleanup(&excluded_extents);
10007                 root->fs_info->fsck_extent_cache = NULL;
10008                 root->fs_info->free_extent_hook = NULL;
10009                 root->fs_info->corrupt_blocks = NULL;
10010                 root->fs_info->excluded_extents = NULL;
10011         }
10012         free(bits);
10013         free_chunk_cache_tree(&chunk_cache);
10014         free_device_cache_tree(&dev_cache);
10015         free_block_group_tree(&block_group_cache);
10016         free_device_extent_tree(&dev_extent_cache);
10017         free_extent_cache_tree(&seen);
10018         free_extent_cache_tree(&pending);
10019         free_extent_cache_tree(&reada);
10020         free_extent_cache_tree(&nodes);
10021         free_root_item_list(&normal_trees);
10022         free_root_item_list(&dropping_trees);
10023         return ret;
10024 loop:
10025         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10026         free_extent_cache_tree(&seen);
10027         free_extent_cache_tree(&pending);
10028         free_extent_cache_tree(&reada);
10029         free_extent_cache_tree(&nodes);
10030         free_chunk_cache_tree(&chunk_cache);
10031         free_block_group_tree(&block_group_cache);
10032         free_device_cache_tree(&dev_cache);
10033         free_device_extent_tree(&dev_extent_cache);
10034         free_extent_record_cache(&extent_cache);
10035         free_root_item_list(&normal_trees);
10036         free_root_item_list(&dropping_trees);
10037         extent_io_tree_cleanup(&excluded_extents);
10038         goto again;
10039 }
10040
10041 /*
10042  * Check backrefs of a tree block given by @bytenr or @eb.
10043  *
10044  * @root:       the root containing the @bytenr or @eb
10045  * @eb:         tree block extent buffer, can be NULL
10046  * @bytenr:     bytenr of the tree block to search
10047  * @level:      tree level of the tree block
10048  * @owner:      owner of the tree block
10049  *
10050  * Return >0 for any error found and output error message
10051  * Return 0 for no error found
10052  */
10053 static int check_tree_block_ref(struct btrfs_root *root,
10054                                 struct extent_buffer *eb, u64 bytenr,
10055                                 int level, u64 owner)
10056 {
10057         struct btrfs_key key;
10058         struct btrfs_root *extent_root = root->fs_info->extent_root;
10059         struct btrfs_path path;
10060         struct btrfs_extent_item *ei;
10061         struct btrfs_extent_inline_ref *iref;
10062         struct extent_buffer *leaf;
10063         unsigned long end;
10064         unsigned long ptr;
10065         int slot;
10066         int skinny_level;
10067         int type;
10068         u32 nodesize = root->nodesize;
10069         u32 item_size;
10070         u64 offset;
10071         int tree_reloc_root = 0;
10072         int found_ref = 0;
10073         int err = 0;
10074         int ret;
10075
10076         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10077             btrfs_header_bytenr(root->node) == bytenr)
10078                 tree_reloc_root = 1;
10079
10080         btrfs_init_path(&path);
10081         key.objectid = bytenr;
10082         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10083                 key.type = BTRFS_METADATA_ITEM_KEY;
10084         else
10085                 key.type = BTRFS_EXTENT_ITEM_KEY;
10086         key.offset = (u64)-1;
10087
10088         /* Search for the backref in extent tree */
10089         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10090         if (ret < 0) {
10091                 err |= BACKREF_MISSING;
10092                 goto out;
10093         }
10094         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10095         if (ret) {
10096                 err |= BACKREF_MISSING;
10097                 goto out;
10098         }
10099
10100         leaf = path.nodes[0];
10101         slot = path.slots[0];
10102         btrfs_item_key_to_cpu(leaf, &key, slot);
10103
10104         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10105
10106         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10107                 skinny_level = (int)key.offset;
10108                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10109         } else {
10110                 struct btrfs_tree_block_info *info;
10111
10112                 info = (struct btrfs_tree_block_info *)(ei + 1);
10113                 skinny_level = btrfs_tree_block_level(leaf, info);
10114                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10115         }
10116
10117         if (eb) {
10118                 u64 header_gen;
10119                 u64 extent_gen;
10120
10121                 if (!(btrfs_extent_flags(leaf, ei) &
10122                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10123                         error(
10124                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10125                                 key.objectid, nodesize,
10126                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10127                         err = BACKREF_MISMATCH;
10128                 }
10129                 header_gen = btrfs_header_generation(eb);
10130                 extent_gen = btrfs_extent_generation(leaf, ei);
10131                 if (header_gen != extent_gen) {
10132                         error(
10133         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10134                                 key.objectid, nodesize, header_gen,
10135                                 extent_gen);
10136                         err = BACKREF_MISMATCH;
10137                 }
10138                 if (level != skinny_level) {
10139                         error(
10140                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10141                                 key.objectid, nodesize, level, skinny_level);
10142                         err = BACKREF_MISMATCH;
10143                 }
10144                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10145                         error(
10146                         "extent[%llu %u] is referred by other roots than %llu",
10147                                 key.objectid, nodesize, root->objectid);
10148                         err = BACKREF_MISMATCH;
10149                 }
10150         }
10151
10152         /*
10153          * Iterate the extent/metadata item to find the exact backref
10154          */
10155         item_size = btrfs_item_size_nr(leaf, slot);
10156         ptr = (unsigned long)iref;
10157         end = (unsigned long)ei + item_size;
10158         while (ptr < end) {
10159                 iref = (struct btrfs_extent_inline_ref *)ptr;
10160                 type = btrfs_extent_inline_ref_type(leaf, iref);
10161                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10162
10163                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10164                         (offset == root->objectid || offset == owner)) {
10165                         found_ref = 1;
10166                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10167                         /*
10168                          * Backref of tree reloc root points to itself, no need
10169                          * to check backref any more.
10170                          */
10171                         if (tree_reloc_root)
10172                                 found_ref = 1;
10173                         else
10174                         /* Check if the backref points to valid referencer */
10175                                 found_ref = !check_tree_block_ref(root, NULL,
10176                                                 offset, level + 1, owner);
10177                 }
10178
10179                 if (found_ref)
10180                         break;
10181                 ptr += btrfs_extent_inline_ref_size(type);
10182         }
10183
10184         /*
10185          * Inlined extent item doesn't have what we need, check
10186          * TREE_BLOCK_REF_KEY
10187          */
10188         if (!found_ref) {
10189                 btrfs_release_path(&path);
10190                 key.objectid = bytenr;
10191                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10192                 key.offset = root->objectid;
10193
10194                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10195                 if (!ret)
10196                         found_ref = 1;
10197         }
10198         if (!found_ref)
10199                 err |= BACKREF_MISSING;
10200 out:
10201         btrfs_release_path(&path);
10202         if (eb && (err & BACKREF_MISSING))
10203                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10204                         bytenr, nodesize, owner, level);
10205         return err;
10206 }
10207
10208 /*
10209  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10210  *
10211  * Return >0 any error found and output error message
10212  * Return 0 for no error found
10213  */
10214 static int check_extent_data_item(struct btrfs_root *root,
10215                                   struct extent_buffer *eb, int slot)
10216 {
10217         struct btrfs_file_extent_item *fi;
10218         struct btrfs_path path;
10219         struct btrfs_root *extent_root = root->fs_info->extent_root;
10220         struct btrfs_key fi_key;
10221         struct btrfs_key dbref_key;
10222         struct extent_buffer *leaf;
10223         struct btrfs_extent_item *ei;
10224         struct btrfs_extent_inline_ref *iref;
10225         struct btrfs_extent_data_ref *dref;
10226         u64 owner;
10227         u64 disk_bytenr;
10228         u64 disk_num_bytes;
10229         u64 extent_num_bytes;
10230         u64 extent_flags;
10231         u32 item_size;
10232         unsigned long end;
10233         unsigned long ptr;
10234         int type;
10235         u64 ref_root;
10236         int found_dbackref = 0;
10237         int err = 0;
10238         int ret;
10239
10240         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10241         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10242
10243         /* Nothing to check for hole and inline data extents */
10244         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10245             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10246                 return 0;
10247
10248         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10249         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10250         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10251
10252         /* Check unaligned disk_num_bytes and num_bytes */
10253         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10254                 error(
10255 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10256                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10257                         root->sectorsize);
10258                 err |= BYTES_UNALIGNED;
10259         } else {
10260                 data_bytes_allocated += disk_num_bytes;
10261         }
10262         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10263                 error(
10264 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10265                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10266                         root->sectorsize);
10267                 err |= BYTES_UNALIGNED;
10268         } else {
10269                 data_bytes_referenced += extent_num_bytes;
10270         }
10271         owner = btrfs_header_owner(eb);
10272
10273         /* Check the extent item of the file extent in extent tree */
10274         btrfs_init_path(&path);
10275         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10276         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10277         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10278
10279         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10280         if (ret)
10281                 goto out;
10282
10283         leaf = path.nodes[0];
10284         slot = path.slots[0];
10285         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10286
10287         extent_flags = btrfs_extent_flags(leaf, ei);
10288
10289         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10290                 error(
10291                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10292                     disk_bytenr, disk_num_bytes,
10293                     BTRFS_EXTENT_FLAG_DATA);
10294                 err |= BACKREF_MISMATCH;
10295         }
10296
10297         /* Check data backref inside that extent item */
10298         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10299         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10300         ptr = (unsigned long)iref;
10301         end = (unsigned long)ei + item_size;
10302         while (ptr < end) {
10303                 iref = (struct btrfs_extent_inline_ref *)ptr;
10304                 type = btrfs_extent_inline_ref_type(leaf, iref);
10305                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10306
10307                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10308                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10309                         if (ref_root == owner || ref_root == root->objectid)
10310                                 found_dbackref = 1;
10311                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10312                         found_dbackref = !check_tree_block_ref(root, NULL,
10313                                 btrfs_extent_inline_ref_offset(leaf, iref),
10314                                 0, owner);
10315                 }
10316
10317                 if (found_dbackref)
10318                         break;
10319                 ptr += btrfs_extent_inline_ref_size(type);
10320         }
10321
10322         if (!found_dbackref) {
10323                 btrfs_release_path(&path);
10324
10325                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10326                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10327                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10328                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10329                                 fi_key.objectid, fi_key.offset);
10330
10331                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10332                                         &dbref_key, &path, 0, 0);
10333                 if (!ret) {
10334                         found_dbackref = 1;
10335                         goto out;
10336                 }
10337
10338                 btrfs_release_path(&path);
10339
10340                 /*
10341                  * Neither inlined nor EXTENT_DATA_REF found, try
10342                  * SHARED_DATA_REF as last chance.
10343                  */
10344                 dbref_key.objectid = disk_bytenr;
10345                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10346                 dbref_key.offset = eb->start;
10347
10348                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10349                                         &dbref_key, &path, 0, 0);
10350                 if (!ret) {
10351                         found_dbackref = 1;
10352                         goto out;
10353                 }
10354         }
10355
10356 out:
10357         if (!found_dbackref)
10358                 err |= BACKREF_MISSING;
10359         btrfs_release_path(&path);
10360         if (err & BACKREF_MISSING) {
10361                 error("data extent[%llu %llu] backref lost",
10362                       disk_bytenr, disk_num_bytes);
10363         }
10364         return err;
10365 }
10366
10367 /*
10368  * Get real tree block level for the case like shared block
10369  * Return >= 0 as tree level
10370  * Return <0 for error
10371  */
10372 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10373 {
10374         struct extent_buffer *eb;
10375         struct btrfs_path path;
10376         struct btrfs_key key;
10377         struct btrfs_extent_item *ei;
10378         u64 flags;
10379         u64 transid;
10380         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10381         u8 backref_level;
10382         u8 header_level;
10383         int ret;
10384
10385         /* Search extent tree for extent generation and level */
10386         key.objectid = bytenr;
10387         key.type = BTRFS_METADATA_ITEM_KEY;
10388         key.offset = (u64)-1;
10389
10390         btrfs_init_path(&path);
10391         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10392         if (ret < 0)
10393                 goto release_out;
10394         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10395         if (ret < 0)
10396                 goto release_out;
10397         if (ret > 0) {
10398                 ret = -ENOENT;
10399                 goto release_out;
10400         }
10401
10402         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10403         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10404                             struct btrfs_extent_item);
10405         flags = btrfs_extent_flags(path.nodes[0], ei);
10406         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10407                 ret = -ENOENT;
10408                 goto release_out;
10409         }
10410
10411         /* Get transid for later read_tree_block() check */
10412         transid = btrfs_extent_generation(path.nodes[0], ei);
10413
10414         /* Get backref level as one source */
10415         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10416                 backref_level = key.offset;
10417         } else {
10418                 struct btrfs_tree_block_info *info;
10419
10420                 info = (struct btrfs_tree_block_info *)(ei + 1);
10421                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10422         }
10423         btrfs_release_path(&path);
10424
10425         /* Get level from tree block as an alternative source */
10426         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10427         if (!extent_buffer_uptodate(eb)) {
10428                 free_extent_buffer(eb);
10429                 return -EIO;
10430         }
10431         header_level = btrfs_header_level(eb);
10432         free_extent_buffer(eb);
10433
10434         if (header_level != backref_level)
10435                 return -EIO;
10436         return header_level;
10437
10438 release_out:
10439         btrfs_release_path(&path);
10440         return ret;
10441 }
10442
10443 /*
10444  * Check if a tree block backref is valid (points to a valid tree block)
10445  * if level == -1, level will be resolved
10446  * Return >0 for any error found and print error message
10447  */
10448 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10449                                     u64 bytenr, int level)
10450 {
10451         struct btrfs_root *root;
10452         struct btrfs_key key;
10453         struct btrfs_path path;
10454         struct extent_buffer *eb;
10455         struct extent_buffer *node;
10456         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10457         int err = 0;
10458         int ret;
10459
10460         /* Query level for level == -1 special case */
10461         if (level == -1)
10462                 level = query_tree_block_level(fs_info, bytenr);
10463         if (level < 0) {
10464                 err |= REFERENCER_MISSING;
10465                 goto out;
10466         }
10467
10468         key.objectid = root_id;
10469         key.type = BTRFS_ROOT_ITEM_KEY;
10470         key.offset = (u64)-1;
10471
10472         root = btrfs_read_fs_root(fs_info, &key);
10473         if (IS_ERR(root)) {
10474                 err |= REFERENCER_MISSING;
10475                 goto out;
10476         }
10477
10478         /* Read out the tree block to get item/node key */
10479         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10480         if (!extent_buffer_uptodate(eb)) {
10481                 err |= REFERENCER_MISSING;
10482                 free_extent_buffer(eb);
10483                 goto out;
10484         }
10485
10486         /* Empty tree, no need to check key */
10487         if (!btrfs_header_nritems(eb) && !level) {
10488                 free_extent_buffer(eb);
10489                 goto out;
10490         }
10491
10492         if (level)
10493                 btrfs_node_key_to_cpu(eb, &key, 0);
10494         else
10495                 btrfs_item_key_to_cpu(eb, &key, 0);
10496
10497         free_extent_buffer(eb);
10498
10499         btrfs_init_path(&path);
10500         path.lowest_level = level;
10501         /* Search with the first key, to ensure we can reach it */
10502         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10503         if (ret < 0) {
10504                 err |= REFERENCER_MISSING;
10505                 goto release_out;
10506         }
10507
10508         node = path.nodes[level];
10509         if (btrfs_header_bytenr(node) != bytenr) {
10510                 error(
10511         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10512                         bytenr, nodesize, bytenr,
10513                         btrfs_header_bytenr(node));
10514                 err |= REFERENCER_MISMATCH;
10515         }
10516         if (btrfs_header_level(node) != level) {
10517                 error(
10518         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10519                         bytenr, nodesize, level,
10520                         btrfs_header_level(node));
10521                 err |= REFERENCER_MISMATCH;
10522         }
10523
10524 release_out:
10525         btrfs_release_path(&path);
10526 out:
10527         if (err & REFERENCER_MISSING) {
10528                 if (level < 0)
10529                         error("extent [%llu %d] lost referencer (owner: %llu)",
10530                                 bytenr, nodesize, root_id);
10531                 else
10532                         error(
10533                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10534                                 bytenr, nodesize, root_id, level);
10535         }
10536
10537         return err;
10538 }
10539
10540 /*
10541  * Check if tree block @eb is tree reloc root.
10542  * Return 0 if it's not or any problem happens
10543  * Return 1 if it's a tree reloc root
10544  */
10545 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10546                                  struct extent_buffer *eb)
10547 {
10548         struct btrfs_root *tree_reloc_root;
10549         struct btrfs_key key;
10550         u64 bytenr = btrfs_header_bytenr(eb);
10551         u64 owner = btrfs_header_owner(eb);
10552         int ret = 0;
10553
10554         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10555         key.offset = owner;
10556         key.type = BTRFS_ROOT_ITEM_KEY;
10557
10558         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10559         if (IS_ERR(tree_reloc_root))
10560                 return 0;
10561
10562         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10563                 ret = 1;
10564         btrfs_free_fs_root(tree_reloc_root);
10565         return ret;
10566 }
10567
10568 /*
10569  * Check referencer for shared block backref
10570  * If level == -1, this function will resolve the level.
10571  */
10572 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10573                                      u64 parent, u64 bytenr, int level)
10574 {
10575         struct extent_buffer *eb;
10576         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10577         u32 nr;
10578         int found_parent = 0;
10579         int i;
10580
10581         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10582         if (!extent_buffer_uptodate(eb))
10583                 goto out;
10584
10585         if (level == -1)
10586                 level = query_tree_block_level(fs_info, bytenr);
10587         if (level < 0)
10588                 goto out;
10589
10590         /* It's possible it's a tree reloc root */
10591         if (parent == bytenr) {
10592                 if (is_tree_reloc_root(fs_info, eb))
10593                         found_parent = 1;
10594                 goto out;
10595         }
10596
10597         if (level + 1 != btrfs_header_level(eb))
10598                 goto out;
10599
10600         nr = btrfs_header_nritems(eb);
10601         for (i = 0; i < nr; i++) {
10602                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10603                         found_parent = 1;
10604                         break;
10605                 }
10606         }
10607 out:
10608         free_extent_buffer(eb);
10609         if (!found_parent) {
10610                 error(
10611         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10612                         bytenr, nodesize, parent, level);
10613                 return REFERENCER_MISSING;
10614         }
10615         return 0;
10616 }
10617
10618 /*
10619  * Check referencer for normal (inlined) data ref
10620  * If len == 0, it will be resolved by searching in extent tree
10621  */
10622 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10623                                      u64 root_id, u64 objectid, u64 offset,
10624                                      u64 bytenr, u64 len, u32 count)
10625 {
10626         struct btrfs_root *root;
10627         struct btrfs_root *extent_root = fs_info->extent_root;
10628         struct btrfs_key key;
10629         struct btrfs_path path;
10630         struct extent_buffer *leaf;
10631         struct btrfs_file_extent_item *fi;
10632         u32 found_count = 0;
10633         int slot;
10634         int ret = 0;
10635
10636         if (!len) {
10637                 key.objectid = bytenr;
10638                 key.type = BTRFS_EXTENT_ITEM_KEY;
10639                 key.offset = (u64)-1;
10640
10641                 btrfs_init_path(&path);
10642                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10643                 if (ret < 0)
10644                         goto out;
10645                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10646                 if (ret)
10647                         goto out;
10648                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10649                 if (key.objectid != bytenr ||
10650                     key.type != BTRFS_EXTENT_ITEM_KEY)
10651                         goto out;
10652                 len = key.offset;
10653                 btrfs_release_path(&path);
10654         }
10655         key.objectid = root_id;
10656         key.type = BTRFS_ROOT_ITEM_KEY;
10657         key.offset = (u64)-1;
10658         btrfs_init_path(&path);
10659
10660         root = btrfs_read_fs_root(fs_info, &key);
10661         if (IS_ERR(root))
10662                 goto out;
10663
10664         key.objectid = objectid;
10665         key.type = BTRFS_EXTENT_DATA_KEY;
10666         /*
10667          * It can be nasty as data backref offset is
10668          * file offset - file extent offset, which is smaller or
10669          * equal to original backref offset.  The only special case is
10670          * overflow.  So we need to special check and do further search.
10671          */
10672         key.offset = offset & (1ULL << 63) ? 0 : offset;
10673
10674         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10675         if (ret < 0)
10676                 goto out;
10677
10678         /*
10679          * Search afterwards to get correct one
10680          * NOTE: As we must do a comprehensive check on the data backref to
10681          * make sure the dref count also matches, we must iterate all file
10682          * extents for that inode.
10683          */
10684         while (1) {
10685                 leaf = path.nodes[0];
10686                 slot = path.slots[0];
10687
10688                 if (slot >= btrfs_header_nritems(leaf))
10689                         goto next;
10690                 btrfs_item_key_to_cpu(leaf, &key, slot);
10691                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10692                         break;
10693                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10694                 /*
10695                  * Except normal disk bytenr and disk num bytes, we still
10696                  * need to do extra check on dbackref offset as
10697                  * dbackref offset = file_offset - file_extent_offset
10698                  */
10699                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10700                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10701                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10702                     offset)
10703                         found_count++;
10704
10705 next:
10706                 ret = btrfs_next_item(root, &path);
10707                 if (ret)
10708                         break;
10709         }
10710 out:
10711         btrfs_release_path(&path);
10712         if (found_count != count) {
10713                 error(
10714 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10715                         bytenr, len, root_id, objectid, offset, count, found_count);
10716                 return REFERENCER_MISSING;
10717         }
10718         return 0;
10719 }
10720
10721 /*
10722  * Check if the referencer of a shared data backref exists
10723  */
10724 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10725                                      u64 parent, u64 bytenr)
10726 {
10727         struct extent_buffer *eb;
10728         struct btrfs_key key;
10729         struct btrfs_file_extent_item *fi;
10730         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10731         u32 nr;
10732         int found_parent = 0;
10733         int i;
10734
10735         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10736         if (!extent_buffer_uptodate(eb))
10737                 goto out;
10738
10739         nr = btrfs_header_nritems(eb);
10740         for (i = 0; i < nr; i++) {
10741                 btrfs_item_key_to_cpu(eb, &key, i);
10742                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10743                         continue;
10744
10745                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10746                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10747                         continue;
10748
10749                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10750                         found_parent = 1;
10751                         break;
10752                 }
10753         }
10754
10755 out:
10756         free_extent_buffer(eb);
10757         if (!found_parent) {
10758                 error("shared extent %llu referencer lost (parent: %llu)",
10759                         bytenr, parent);
10760                 return REFERENCER_MISSING;
10761         }
10762         return 0;
10763 }
10764
10765 /*
10766  * This function will check a given extent item, including its backref and
10767  * itself (like crossing stripe boundary and type)
10768  *
10769  * Since we don't use extent_record anymore, introduce new error bit
10770  */
10771 static int check_extent_item(struct btrfs_fs_info *fs_info,
10772                              struct extent_buffer *eb, int slot)
10773 {
10774         struct btrfs_extent_item *ei;
10775         struct btrfs_extent_inline_ref *iref;
10776         struct btrfs_extent_data_ref *dref;
10777         unsigned long end;
10778         unsigned long ptr;
10779         int type;
10780         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10781         u32 item_size = btrfs_item_size_nr(eb, slot);
10782         u64 flags;
10783         u64 offset;
10784         int metadata = 0;
10785         int level;
10786         struct btrfs_key key;
10787         int ret;
10788         int err = 0;
10789
10790         btrfs_item_key_to_cpu(eb, &key, slot);
10791         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10792                 bytes_used += key.offset;
10793         else
10794                 bytes_used += nodesize;
10795
10796         if (item_size < sizeof(*ei)) {
10797                 /*
10798                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10799                  * old thing when on disk format is still un-determined.
10800                  * No need to care about it anymore
10801                  */
10802                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10803                 return -ENOTTY;
10804         }
10805
10806         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10807         flags = btrfs_extent_flags(eb, ei);
10808
10809         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10810                 metadata = 1;
10811         if (metadata && check_crossing_stripes(global_info, key.objectid,
10812                                                eb->len)) {
10813                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10814                       key.objectid, key.objectid + nodesize);
10815                 err |= CROSSING_STRIPE_BOUNDARY;
10816         }
10817
10818         ptr = (unsigned long)(ei + 1);
10819
10820         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10821                 /* Old EXTENT_ITEM metadata */
10822                 struct btrfs_tree_block_info *info;
10823
10824                 info = (struct btrfs_tree_block_info *)ptr;
10825                 level = btrfs_tree_block_level(eb, info);
10826                 ptr += sizeof(struct btrfs_tree_block_info);
10827         } else {
10828                 /* New METADATA_ITEM */
10829                 level = key.offset;
10830         }
10831         end = (unsigned long)ei + item_size;
10832
10833 next:
10834         /* Reached extent item end normally */
10835         if (ptr == end)
10836                 goto out;
10837
10838         /* Beyond extent item end, wrong item size */
10839         if (ptr > end) {
10840                 err |= ITEM_SIZE_MISMATCH;
10841                 error("extent item at bytenr %llu slot %d has wrong size",
10842                         eb->start, slot);
10843                 goto out;
10844         }
10845
10846         /* Now check every backref in this extent item */
10847         iref = (struct btrfs_extent_inline_ref *)ptr;
10848         type = btrfs_extent_inline_ref_type(eb, iref);
10849         offset = btrfs_extent_inline_ref_offset(eb, iref);
10850         switch (type) {
10851         case BTRFS_TREE_BLOCK_REF_KEY:
10852                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10853                                                level);
10854                 err |= ret;
10855                 break;
10856         case BTRFS_SHARED_BLOCK_REF_KEY:
10857                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10858                                                  level);
10859                 err |= ret;
10860                 break;
10861         case BTRFS_EXTENT_DATA_REF_KEY:
10862                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10863                 ret = check_extent_data_backref(fs_info,
10864                                 btrfs_extent_data_ref_root(eb, dref),
10865                                 btrfs_extent_data_ref_objectid(eb, dref),
10866                                 btrfs_extent_data_ref_offset(eb, dref),
10867                                 key.objectid, key.offset,
10868                                 btrfs_extent_data_ref_count(eb, dref));
10869                 err |= ret;
10870                 break;
10871         case BTRFS_SHARED_DATA_REF_KEY:
10872                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10873                 err |= ret;
10874                 break;
10875         default:
10876                 error("extent[%llu %d %llu] has unknown ref type: %d",
10877                         key.objectid, key.type, key.offset, type);
10878                 err |= UNKNOWN_TYPE;
10879                 goto out;
10880         }
10881
10882         ptr += btrfs_extent_inline_ref_size(type);
10883         goto next;
10884
10885 out:
10886         return err;
10887 }
10888
10889 /*
10890  * Check if a dev extent item is referred correctly by its chunk
10891  */
10892 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10893                                  struct extent_buffer *eb, int slot)
10894 {
10895         struct btrfs_root *chunk_root = fs_info->chunk_root;
10896         struct btrfs_dev_extent *ptr;
10897         struct btrfs_path path;
10898         struct btrfs_key chunk_key;
10899         struct btrfs_key devext_key;
10900         struct btrfs_chunk *chunk;
10901         struct extent_buffer *l;
10902         int num_stripes;
10903         u64 length;
10904         int i;
10905         int found_chunk = 0;
10906         int ret;
10907
10908         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10909         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10910         length = btrfs_dev_extent_length(eb, ptr);
10911
10912         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10913         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10914         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10915
10916         btrfs_init_path(&path);
10917         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10918         if (ret)
10919                 goto out;
10920
10921         l = path.nodes[0];
10922         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10923         if (btrfs_chunk_length(l, chunk) != length)
10924                 goto out;
10925
10926         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10927         for (i = 0; i < num_stripes; i++) {
10928                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10929                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10930
10931                 if (devid == devext_key.objectid &&
10932                     offset == devext_key.offset) {
10933                         found_chunk = 1;
10934                         break;
10935                 }
10936         }
10937 out:
10938         btrfs_release_path(&path);
10939         if (!found_chunk) {
10940                 error(
10941                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10942                         devext_key.objectid, devext_key.offset, length);
10943                 return REFERENCER_MISSING;
10944         }
10945         return 0;
10946 }
10947
10948 /*
10949  * Check if the used space is correct with the dev item
10950  */
10951 static int check_dev_item(struct btrfs_fs_info *fs_info,
10952                           struct extent_buffer *eb, int slot)
10953 {
10954         struct btrfs_root *dev_root = fs_info->dev_root;
10955         struct btrfs_dev_item *dev_item;
10956         struct btrfs_path path;
10957         struct btrfs_key key;
10958         struct btrfs_dev_extent *ptr;
10959         u64 dev_id;
10960         u64 used;
10961         u64 total = 0;
10962         int ret;
10963
10964         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10965         dev_id = btrfs_device_id(eb, dev_item);
10966         used = btrfs_device_bytes_used(eb, dev_item);
10967
10968         key.objectid = dev_id;
10969         key.type = BTRFS_DEV_EXTENT_KEY;
10970         key.offset = 0;
10971
10972         btrfs_init_path(&path);
10973         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10974         if (ret < 0) {
10975                 btrfs_item_key_to_cpu(eb, &key, slot);
10976                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10977                         key.objectid, key.type, key.offset);
10978                 btrfs_release_path(&path);
10979                 return REFERENCER_MISSING;
10980         }
10981
10982         /* Iterate dev_extents to calculate the used space of a device */
10983         while (1) {
10984                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10985                         goto next;
10986
10987                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10988                 if (key.objectid > dev_id)
10989                         break;
10990                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10991                         goto next;
10992
10993                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10994                                      struct btrfs_dev_extent);
10995                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10996 next:
10997                 ret = btrfs_next_item(dev_root, &path);
10998                 if (ret)
10999                         break;
11000         }
11001         btrfs_release_path(&path);
11002
11003         if (used != total) {
11004                 btrfs_item_key_to_cpu(eb, &key, slot);
11005                 error(
11006 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11007                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11008                         BTRFS_DEV_EXTENT_KEY, dev_id);
11009                 return ACCOUNTING_MISMATCH;
11010         }
11011         return 0;
11012 }
11013
11014 /*
11015  * Check a block group item with its referener (chunk) and its used space
11016  * with extent/metadata item
11017  */
11018 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11019                                   struct extent_buffer *eb, int slot)
11020 {
11021         struct btrfs_root *extent_root = fs_info->extent_root;
11022         struct btrfs_root *chunk_root = fs_info->chunk_root;
11023         struct btrfs_block_group_item *bi;
11024         struct btrfs_block_group_item bg_item;
11025         struct btrfs_path path;
11026         struct btrfs_key bg_key;
11027         struct btrfs_key chunk_key;
11028         struct btrfs_key extent_key;
11029         struct btrfs_chunk *chunk;
11030         struct extent_buffer *leaf;
11031         struct btrfs_extent_item *ei;
11032         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11033         u64 flags;
11034         u64 bg_flags;
11035         u64 used;
11036         u64 total = 0;
11037         int ret;
11038         int err = 0;
11039
11040         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11041         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11042         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11043         used = btrfs_block_group_used(&bg_item);
11044         bg_flags = btrfs_block_group_flags(&bg_item);
11045
11046         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11047         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11048         chunk_key.offset = bg_key.objectid;
11049
11050         btrfs_init_path(&path);
11051         /* Search for the referencer chunk */
11052         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11053         if (ret) {
11054                 error(
11055                 "block group[%llu %llu] did not find the related chunk item",
11056                         bg_key.objectid, bg_key.offset);
11057                 err |= REFERENCER_MISSING;
11058         } else {
11059                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11060                                         struct btrfs_chunk);
11061                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11062                                                 bg_key.offset) {
11063                         error(
11064         "block group[%llu %llu] related chunk item length does not match",
11065                                 bg_key.objectid, bg_key.offset);
11066                         err |= REFERENCER_MISMATCH;
11067                 }
11068         }
11069         btrfs_release_path(&path);
11070
11071         /* Search from the block group bytenr */
11072         extent_key.objectid = bg_key.objectid;
11073         extent_key.type = 0;
11074         extent_key.offset = 0;
11075
11076         btrfs_init_path(&path);
11077         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11078         if (ret < 0)
11079                 goto out;
11080
11081         /* Iterate extent tree to account used space */
11082         while (1) {
11083                 leaf = path.nodes[0];
11084
11085                 /* Search slot can point to the last item beyond leaf nritems */
11086                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11087                         goto next;
11088
11089                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11090                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11091                         break;
11092
11093                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11094                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11095                         goto next;
11096                 if (extent_key.objectid < bg_key.objectid)
11097                         goto next;
11098
11099                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11100                         total += nodesize;
11101                 else
11102                         total += extent_key.offset;
11103
11104                 ei = btrfs_item_ptr(leaf, path.slots[0],
11105                                     struct btrfs_extent_item);
11106                 flags = btrfs_extent_flags(leaf, ei);
11107                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11108                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11109                                 error(
11110                         "bad extent[%llu, %llu) type mismatch with chunk",
11111                                         extent_key.objectid,
11112                                         extent_key.objectid + extent_key.offset);
11113                                 err |= CHUNK_TYPE_MISMATCH;
11114                         }
11115                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11116                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11117                                     BTRFS_BLOCK_GROUP_METADATA))) {
11118                                 error(
11119                         "bad extent[%llu, %llu) type mismatch with chunk",
11120                                         extent_key.objectid,
11121                                         extent_key.objectid + nodesize);
11122                                 err |= CHUNK_TYPE_MISMATCH;
11123                         }
11124                 }
11125 next:
11126                 ret = btrfs_next_item(extent_root, &path);
11127                 if (ret)
11128                         break;
11129         }
11130
11131 out:
11132         btrfs_release_path(&path);
11133
11134         if (total != used) {
11135                 error(
11136                 "block group[%llu %llu] used %llu but extent items used %llu",
11137                         bg_key.objectid, bg_key.offset, used, total);
11138                 err |= ACCOUNTING_MISMATCH;
11139         }
11140         return err;
11141 }
11142
11143 /*
11144  * Check a chunk item.
11145  * Including checking all referred dev_extents and block group
11146  */
11147 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11148                             struct extent_buffer *eb, int slot)
11149 {
11150         struct btrfs_root *extent_root = fs_info->extent_root;
11151         struct btrfs_root *dev_root = fs_info->dev_root;
11152         struct btrfs_path path;
11153         struct btrfs_key chunk_key;
11154         struct btrfs_key bg_key;
11155         struct btrfs_key devext_key;
11156         struct btrfs_chunk *chunk;
11157         struct extent_buffer *leaf;
11158         struct btrfs_block_group_item *bi;
11159         struct btrfs_block_group_item bg_item;
11160         struct btrfs_dev_extent *ptr;
11161         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11162         u64 length;
11163         u64 chunk_end;
11164         u64 type;
11165         u64 profile;
11166         int num_stripes;
11167         u64 offset;
11168         u64 objectid;
11169         int i;
11170         int ret;
11171         int err = 0;
11172
11173         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11174         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11175         length = btrfs_chunk_length(eb, chunk);
11176         chunk_end = chunk_key.offset + length;
11177         if (!IS_ALIGNED(length, sectorsize)) {
11178                 error("chunk[%llu %llu) not aligned to %u",
11179                         chunk_key.offset, chunk_end, sectorsize);
11180                 err |= BYTES_UNALIGNED;
11181                 goto out;
11182         }
11183
11184         type = btrfs_chunk_type(eb, chunk);
11185         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11186         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11187                 error("chunk[%llu %llu) has no chunk type",
11188                         chunk_key.offset, chunk_end);
11189                 err |= UNKNOWN_TYPE;
11190         }
11191         if (profile && (profile & (profile - 1))) {
11192                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11193                         chunk_key.offset, chunk_end, profile);
11194                 err |= UNKNOWN_TYPE;
11195         }
11196
11197         bg_key.objectid = chunk_key.offset;
11198         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11199         bg_key.offset = length;
11200
11201         btrfs_init_path(&path);
11202         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11203         if (ret) {
11204                 error(
11205                 "chunk[%llu %llu) did not find the related block group item",
11206                         chunk_key.offset, chunk_end);
11207                 err |= REFERENCER_MISSING;
11208         } else{
11209                 leaf = path.nodes[0];
11210                 bi = btrfs_item_ptr(leaf, path.slots[0],
11211                                     struct btrfs_block_group_item);
11212                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11213                                    sizeof(bg_item));
11214                 if (btrfs_block_group_flags(&bg_item) != type) {
11215                         error(
11216 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11217                                 chunk_key.offset, chunk_end, type,
11218                                 btrfs_block_group_flags(&bg_item));
11219                         err |= REFERENCER_MISSING;
11220                 }
11221         }
11222
11223         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11224         for (i = 0; i < num_stripes; i++) {
11225                 btrfs_release_path(&path);
11226                 btrfs_init_path(&path);
11227                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11228                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11229                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11230
11231                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11232                                         0, 0);
11233                 if (ret)
11234                         goto not_match_dev;
11235
11236                 leaf = path.nodes[0];
11237                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11238                                      struct btrfs_dev_extent);
11239                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11240                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11241                 if (objectid != chunk_key.objectid ||
11242                     offset != chunk_key.offset ||
11243                     btrfs_dev_extent_length(leaf, ptr) != length)
11244                         goto not_match_dev;
11245                 continue;
11246 not_match_dev:
11247                 err |= BACKREF_MISSING;
11248                 error(
11249                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11250                         chunk_key.objectid, chunk_end, i);
11251                 continue;
11252         }
11253         btrfs_release_path(&path);
11254 out:
11255         return err;
11256 }
11257
11258 /*
11259  * Main entry function to check known items and update related accounting info
11260  */
11261 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11262 {
11263         struct btrfs_fs_info *fs_info = root->fs_info;
11264         struct btrfs_key key;
11265         int slot = 0;
11266         int type;
11267         struct btrfs_extent_data_ref *dref;
11268         int ret;
11269         int err = 0;
11270
11271 next:
11272         btrfs_item_key_to_cpu(eb, &key, slot);
11273         type = key.type;
11274
11275         switch (type) {
11276         case BTRFS_EXTENT_DATA_KEY:
11277                 ret = check_extent_data_item(root, eb, slot);
11278                 err |= ret;
11279                 break;
11280         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11281                 ret = check_block_group_item(fs_info, eb, slot);
11282                 err |= ret;
11283                 break;
11284         case BTRFS_DEV_ITEM_KEY:
11285                 ret = check_dev_item(fs_info, eb, slot);
11286                 err |= ret;
11287                 break;
11288         case BTRFS_CHUNK_ITEM_KEY:
11289                 ret = check_chunk_item(fs_info, eb, slot);
11290                 err |= ret;
11291                 break;
11292         case BTRFS_DEV_EXTENT_KEY:
11293                 ret = check_dev_extent_item(fs_info, eb, slot);
11294                 err |= ret;
11295                 break;
11296         case BTRFS_EXTENT_ITEM_KEY:
11297         case BTRFS_METADATA_ITEM_KEY:
11298                 ret = check_extent_item(fs_info, eb, slot);
11299                 err |= ret;
11300                 break;
11301         case BTRFS_EXTENT_CSUM_KEY:
11302                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11303                 break;
11304         case BTRFS_TREE_BLOCK_REF_KEY:
11305                 ret = check_tree_block_backref(fs_info, key.offset,
11306                                                key.objectid, -1);
11307                 err |= ret;
11308                 break;
11309         case BTRFS_EXTENT_DATA_REF_KEY:
11310                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11311                 ret = check_extent_data_backref(fs_info,
11312                                 btrfs_extent_data_ref_root(eb, dref),
11313                                 btrfs_extent_data_ref_objectid(eb, dref),
11314                                 btrfs_extent_data_ref_offset(eb, dref),
11315                                 key.objectid, 0,
11316                                 btrfs_extent_data_ref_count(eb, dref));
11317                 err |= ret;
11318                 break;
11319         case BTRFS_SHARED_BLOCK_REF_KEY:
11320                 ret = check_shared_block_backref(fs_info, key.offset,
11321                                                  key.objectid, -1);
11322                 err |= ret;
11323                 break;
11324         case BTRFS_SHARED_DATA_REF_KEY:
11325                 ret = check_shared_data_backref(fs_info, key.offset,
11326                                                 key.objectid);
11327                 err |= ret;
11328                 break;
11329         default:
11330                 break;
11331         }
11332
11333         if (++slot < btrfs_header_nritems(eb))
11334                 goto next;
11335
11336         return err;
11337 }
11338
11339 /*
11340  * Helper function for later fs/subvol tree check.  To determine if a tree
11341  * block should be checked.
11342  * This function will ensure only the direct referencer with lowest rootid to
11343  * check a fs/subvolume tree block.
11344  *
11345  * Backref check at extent tree would detect errors like missing subvolume
11346  * tree, so we can do aggressive check to reduce duplicated checks.
11347  */
11348 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11349 {
11350         struct btrfs_root *extent_root = root->fs_info->extent_root;
11351         struct btrfs_key key;
11352         struct btrfs_path path;
11353         struct extent_buffer *leaf;
11354         int slot;
11355         struct btrfs_extent_item *ei;
11356         unsigned long ptr;
11357         unsigned long end;
11358         int type;
11359         u32 item_size;
11360         u64 offset;
11361         struct btrfs_extent_inline_ref *iref;
11362         int ret;
11363
11364         btrfs_init_path(&path);
11365         key.objectid = btrfs_header_bytenr(eb);
11366         key.type = BTRFS_METADATA_ITEM_KEY;
11367         key.offset = (u64)-1;
11368
11369         /*
11370          * Any failure in backref resolving means we can't determine
11371          * whom the tree block belongs to.
11372          * So in that case, we need to check that tree block
11373          */
11374         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11375         if (ret < 0)
11376                 goto need_check;
11377
11378         ret = btrfs_previous_extent_item(extent_root, &path,
11379                                          btrfs_header_bytenr(eb));
11380         if (ret)
11381                 goto need_check;
11382
11383         leaf = path.nodes[0];
11384         slot = path.slots[0];
11385         btrfs_item_key_to_cpu(leaf, &key, slot);
11386         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11387
11388         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11389                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11390         } else {
11391                 struct btrfs_tree_block_info *info;
11392
11393                 info = (struct btrfs_tree_block_info *)(ei + 1);
11394                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11395         }
11396
11397         item_size = btrfs_item_size_nr(leaf, slot);
11398         ptr = (unsigned long)iref;
11399         end = (unsigned long)ei + item_size;
11400         while (ptr < end) {
11401                 iref = (struct btrfs_extent_inline_ref *)ptr;
11402                 type = btrfs_extent_inline_ref_type(leaf, iref);
11403                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11404
11405                 /*
11406                  * We only check the tree block if current root is
11407                  * the lowest referencer of it.
11408                  */
11409                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11410                     offset < root->objectid) {
11411                         btrfs_release_path(&path);
11412                         return 0;
11413                 }
11414
11415                 ptr += btrfs_extent_inline_ref_size(type);
11416         }
11417         /*
11418          * Normally we should also check keyed tree block ref, but that may be
11419          * very time consuming.  Inlined ref should already make us skip a lot
11420          * of refs now.  So skip search keyed tree block ref.
11421          */
11422
11423 need_check:
11424         btrfs_release_path(&path);
11425         return 1;
11426 }
11427
11428 /*
11429  * Traversal function for tree block. We will do:
11430  * 1) Skip shared fs/subvolume tree blocks
11431  * 2) Update related bytes accounting
11432  * 3) Pre-order traversal
11433  */
11434 static int traverse_tree_block(struct btrfs_root *root,
11435                                 struct extent_buffer *node)
11436 {
11437         struct extent_buffer *eb;
11438         struct btrfs_key key;
11439         struct btrfs_key drop_key;
11440         int level;
11441         u64 nr;
11442         int i;
11443         int err = 0;
11444         int ret;
11445
11446         /*
11447          * Skip shared fs/subvolume tree block, in that case they will
11448          * be checked by referencer with lowest rootid
11449          */
11450         if (is_fstree(root->objectid) && !should_check(root, node))
11451                 return 0;
11452
11453         /* Update bytes accounting */
11454         total_btree_bytes += node->len;
11455         if (fs_root_objectid(btrfs_header_owner(node)))
11456                 total_fs_tree_bytes += node->len;
11457         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11458                 total_extent_tree_bytes += node->len;
11459         if (!found_old_backref &&
11460             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11461             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11462             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11463                 found_old_backref = 1;
11464
11465         /* pre-order tranversal, check itself first */
11466         level = btrfs_header_level(node);
11467         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11468                                    btrfs_header_level(node),
11469                                    btrfs_header_owner(node));
11470         err |= ret;
11471         if (err)
11472                 error(
11473         "check %s failed root %llu bytenr %llu level %d, force continue check",
11474                         level ? "node":"leaf", root->objectid,
11475                         btrfs_header_bytenr(node), btrfs_header_level(node));
11476
11477         if (!level) {
11478                 btree_space_waste += btrfs_leaf_free_space(root, node);
11479                 ret = check_leaf_items(root, node);
11480                 err |= ret;
11481                 return err;
11482         }
11483
11484         nr = btrfs_header_nritems(node);
11485         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11486         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11487                 sizeof(struct btrfs_key_ptr);
11488
11489         /* Then check all its children */
11490         for (i = 0; i < nr; i++) {
11491                 u64 blocknr = btrfs_node_blockptr(node, i);
11492
11493                 btrfs_node_key_to_cpu(node, &key, i);
11494                 if (level == root->root_item.drop_level &&
11495                     is_dropped_key(&key, &drop_key))
11496                         continue;
11497
11498                 /*
11499                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11500                  * to call the function itself.
11501                  */
11502                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11503                 if (extent_buffer_uptodate(eb)) {
11504                         ret = traverse_tree_block(root, eb);
11505                         err |= ret;
11506                 }
11507                 free_extent_buffer(eb);
11508         }
11509
11510         return err;
11511 }
11512
11513 /*
11514  * Low memory usage version check_chunks_and_extents.
11515  */
11516 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11517 {
11518         struct btrfs_path path;
11519         struct btrfs_key key;
11520         struct btrfs_root *root1;
11521         struct btrfs_root *cur_root;
11522         int err = 0;
11523         int ret;
11524
11525         root1 = root->fs_info->chunk_root;
11526         ret = traverse_tree_block(root1, root1->node);
11527         err |= ret;
11528
11529         root1 = root->fs_info->tree_root;
11530         ret = traverse_tree_block(root1, root1->node);
11531         err |= ret;
11532
11533         btrfs_init_path(&path);
11534         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11535         key.offset = 0;
11536         key.type = BTRFS_ROOT_ITEM_KEY;
11537
11538         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11539         if (ret) {
11540                 error("cannot find extent treet in tree_root");
11541                 goto out;
11542         }
11543
11544         while (1) {
11545                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11546                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11547                         goto next;
11548                 key.offset = (u64)-1;
11549
11550                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11551                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11552                                         &key);
11553                 else
11554                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11555                 if (IS_ERR(cur_root) || !cur_root) {
11556                         error("failed to read tree: %lld", key.objectid);
11557                         goto next;
11558                 }
11559
11560                 ret = traverse_tree_block(cur_root, cur_root->node);
11561                 err |= ret;
11562
11563                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11564                         btrfs_free_fs_root(cur_root);
11565 next:
11566                 ret = btrfs_next_item(root1, &path);
11567                 if (ret)
11568                         goto out;
11569         }
11570
11571 out:
11572         btrfs_release_path(&path);
11573         return err;
11574 }
11575
11576 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11577                            struct btrfs_root *root, int overwrite)
11578 {
11579         struct extent_buffer *c;
11580         struct extent_buffer *old = root->node;
11581         int level;
11582         int ret;
11583         struct btrfs_disk_key disk_key = {0,0,0};
11584
11585         level = 0;
11586
11587         if (overwrite) {
11588                 c = old;
11589                 extent_buffer_get(c);
11590                 goto init;
11591         }
11592         c = btrfs_alloc_free_block(trans, root,
11593                                    root->nodesize,
11594                                    root->root_key.objectid,
11595                                    &disk_key, level, 0, 0);
11596         if (IS_ERR(c)) {
11597                 c = old;
11598                 extent_buffer_get(c);
11599                 overwrite = 1;
11600         }
11601 init:
11602         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11603         btrfs_set_header_level(c, level);
11604         btrfs_set_header_bytenr(c, c->start);
11605         btrfs_set_header_generation(c, trans->transid);
11606         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11607         btrfs_set_header_owner(c, root->root_key.objectid);
11608
11609         write_extent_buffer(c, root->fs_info->fsid,
11610                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11611
11612         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11613                             btrfs_header_chunk_tree_uuid(c),
11614                             BTRFS_UUID_SIZE);
11615
11616         btrfs_mark_buffer_dirty(c);
11617         /*
11618          * this case can happen in the following case:
11619          *
11620          * 1.overwrite previous root.
11621          *
11622          * 2.reinit reloc data root, this is because we skip pin
11623          * down reloc data tree before which means we can allocate
11624          * same block bytenr here.
11625          */
11626         if (old->start == c->start) {
11627                 btrfs_set_root_generation(&root->root_item,
11628                                           trans->transid);
11629                 root->root_item.level = btrfs_header_level(root->node);
11630                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11631                                         &root->root_key, &root->root_item);
11632                 if (ret) {
11633                         free_extent_buffer(c);
11634                         return ret;
11635                 }
11636         }
11637         free_extent_buffer(old);
11638         root->node = c;
11639         add_root_to_dirty_list(root);
11640         return 0;
11641 }
11642
11643 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11644                                 struct extent_buffer *eb, int tree_root)
11645 {
11646         struct extent_buffer *tmp;
11647         struct btrfs_root_item *ri;
11648         struct btrfs_key key;
11649         u64 bytenr;
11650         u32 nodesize;
11651         int level = btrfs_header_level(eb);
11652         int nritems;
11653         int ret;
11654         int i;
11655
11656         /*
11657          * If we have pinned this block before, don't pin it again.
11658          * This can not only avoid forever loop with broken filesystem
11659          * but also give us some speedups.
11660          */
11661         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11662                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11663                 return 0;
11664
11665         btrfs_pin_extent(fs_info, eb->start, eb->len);
11666
11667         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11668         nritems = btrfs_header_nritems(eb);
11669         for (i = 0; i < nritems; i++) {
11670                 if (level == 0) {
11671                         btrfs_item_key_to_cpu(eb, &key, i);
11672                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11673                                 continue;
11674                         /* Skip the extent root and reloc roots */
11675                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11676                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11677                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11678                                 continue;
11679                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11680                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11681
11682                         /*
11683                          * If at any point we start needing the real root we
11684                          * will have to build a stump root for the root we are
11685                          * in, but for now this doesn't actually use the root so
11686                          * just pass in extent_root.
11687                          */
11688                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11689                                               nodesize, 0);
11690                         if (!extent_buffer_uptodate(tmp)) {
11691                                 fprintf(stderr, "Error reading root block\n");
11692                                 return -EIO;
11693                         }
11694                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11695                         free_extent_buffer(tmp);
11696                         if (ret)
11697                                 return ret;
11698                 } else {
11699                         bytenr = btrfs_node_blockptr(eb, i);
11700
11701                         /* If we aren't the tree root don't read the block */
11702                         if (level == 1 && !tree_root) {
11703                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11704                                 continue;
11705                         }
11706
11707                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11708                                               nodesize, 0);
11709                         if (!extent_buffer_uptodate(tmp)) {
11710                                 fprintf(stderr, "Error reading tree block\n");
11711                                 return -EIO;
11712                         }
11713                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11714                         free_extent_buffer(tmp);
11715                         if (ret)
11716                                 return ret;
11717                 }
11718         }
11719
11720         return 0;
11721 }
11722
11723 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11724 {
11725         int ret;
11726
11727         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11728         if (ret)
11729                 return ret;
11730
11731         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11732 }
11733
11734 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11735 {
11736         struct btrfs_block_group_cache *cache;
11737         struct btrfs_path path;
11738         struct extent_buffer *leaf;
11739         struct btrfs_chunk *chunk;
11740         struct btrfs_key key;
11741         int ret;
11742         u64 start;
11743
11744         btrfs_init_path(&path);
11745         key.objectid = 0;
11746         key.type = BTRFS_CHUNK_ITEM_KEY;
11747         key.offset = 0;
11748         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11749         if (ret < 0) {
11750                 btrfs_release_path(&path);
11751                 return ret;
11752         }
11753
11754         /*
11755          * We do this in case the block groups were screwed up and had alloc
11756          * bits that aren't actually set on the chunks.  This happens with
11757          * restored images every time and could happen in real life I guess.
11758          */
11759         fs_info->avail_data_alloc_bits = 0;
11760         fs_info->avail_metadata_alloc_bits = 0;
11761         fs_info->avail_system_alloc_bits = 0;
11762
11763         /* First we need to create the in-memory block groups */
11764         while (1) {
11765                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11766                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11767                         if (ret < 0) {
11768                                 btrfs_release_path(&path);
11769                                 return ret;
11770                         }
11771                         if (ret) {
11772                                 ret = 0;
11773                                 break;
11774                         }
11775                 }
11776                 leaf = path.nodes[0];
11777                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11778                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11779                         path.slots[0]++;
11780                         continue;
11781                 }
11782
11783                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11784                 btrfs_add_block_group(fs_info, 0,
11785                                       btrfs_chunk_type(leaf, chunk),
11786                                       key.objectid, key.offset,
11787                                       btrfs_chunk_length(leaf, chunk));
11788                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11789                                  key.offset + btrfs_chunk_length(leaf, chunk));
11790                 path.slots[0]++;
11791         }
11792         start = 0;
11793         while (1) {
11794                 cache = btrfs_lookup_first_block_group(fs_info, start);
11795                 if (!cache)
11796                         break;
11797                 cache->cached = 1;
11798                 start = cache->key.objectid + cache->key.offset;
11799         }
11800
11801         btrfs_release_path(&path);
11802         return 0;
11803 }
11804
11805 static int reset_balance(struct btrfs_trans_handle *trans,
11806                          struct btrfs_fs_info *fs_info)
11807 {
11808         struct btrfs_root *root = fs_info->tree_root;
11809         struct btrfs_path path;
11810         struct extent_buffer *leaf;
11811         struct btrfs_key key;
11812         int del_slot, del_nr = 0;
11813         int ret;
11814         int found = 0;
11815
11816         btrfs_init_path(&path);
11817         key.objectid = BTRFS_BALANCE_OBJECTID;
11818         key.type = BTRFS_BALANCE_ITEM_KEY;
11819         key.offset = 0;
11820         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11821         if (ret) {
11822                 if (ret > 0)
11823                         ret = 0;
11824                 if (!ret)
11825                         goto reinit_data_reloc;
11826                 else
11827                         goto out;
11828         }
11829
11830         ret = btrfs_del_item(trans, root, &path);
11831         if (ret)
11832                 goto out;
11833         btrfs_release_path(&path);
11834
11835         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11836         key.type = BTRFS_ROOT_ITEM_KEY;
11837         key.offset = 0;
11838         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11839         if (ret < 0)
11840                 goto out;
11841         while (1) {
11842                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11843                         if (!found)
11844                                 break;
11845
11846                         if (del_nr) {
11847                                 ret = btrfs_del_items(trans, root, &path,
11848                                                       del_slot, del_nr);
11849                                 del_nr = 0;
11850                                 if (ret)
11851                                         goto out;
11852                         }
11853                         key.offset++;
11854                         btrfs_release_path(&path);
11855
11856                         found = 0;
11857                         ret = btrfs_search_slot(trans, root, &key, &path,
11858                                                 -1, 1);
11859                         if (ret < 0)
11860                                 goto out;
11861                         continue;
11862                 }
11863                 found = 1;
11864                 leaf = path.nodes[0];
11865                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11866                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11867                         break;
11868                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11869                         path.slots[0]++;
11870                         continue;
11871                 }
11872                 if (!del_nr) {
11873                         del_slot = path.slots[0];
11874                         del_nr = 1;
11875                 } else {
11876                         del_nr++;
11877                 }
11878                 path.slots[0]++;
11879         }
11880
11881         if (del_nr) {
11882                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11883                 if (ret)
11884                         goto out;
11885         }
11886         btrfs_release_path(&path);
11887
11888 reinit_data_reloc:
11889         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11890         key.type = BTRFS_ROOT_ITEM_KEY;
11891         key.offset = (u64)-1;
11892         root = btrfs_read_fs_root(fs_info, &key);
11893         if (IS_ERR(root)) {
11894                 fprintf(stderr, "Error reading data reloc tree\n");
11895                 ret = PTR_ERR(root);
11896                 goto out;
11897         }
11898         record_root_in_trans(trans, root);
11899         ret = btrfs_fsck_reinit_root(trans, root, 0);
11900         if (ret)
11901                 goto out;
11902         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11903 out:
11904         btrfs_release_path(&path);
11905         return ret;
11906 }
11907
11908 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11909                               struct btrfs_fs_info *fs_info)
11910 {
11911         u64 start = 0;
11912         int ret;
11913
11914         /*
11915          * The only reason we don't do this is because right now we're just
11916          * walking the trees we find and pinning down their bytes, we don't look
11917          * at any of the leaves.  In order to do mixed groups we'd have to check
11918          * the leaves of any fs roots and pin down the bytes for any file
11919          * extents we find.  Not hard but why do it if we don't have to?
11920          */
11921         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11922                 fprintf(stderr, "We don't support re-initing the extent tree "
11923                         "for mixed block groups yet, please notify a btrfs "
11924                         "developer you want to do this so they can add this "
11925                         "functionality.\n");
11926                 return -EINVAL;
11927         }
11928
11929         /*
11930          * first we need to walk all of the trees except the extent tree and pin
11931          * down the bytes that are in use so we don't overwrite any existing
11932          * metadata.
11933          */
11934         ret = pin_metadata_blocks(fs_info);
11935         if (ret) {
11936                 fprintf(stderr, "error pinning down used bytes\n");
11937                 return ret;
11938         }
11939
11940         /*
11941          * Need to drop all the block groups since we're going to recreate all
11942          * of them again.
11943          */
11944         btrfs_free_block_groups(fs_info);
11945         ret = reset_block_groups(fs_info);
11946         if (ret) {
11947                 fprintf(stderr, "error resetting the block groups\n");
11948                 return ret;
11949         }
11950
11951         /* Ok we can allocate now, reinit the extent root */
11952         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11953         if (ret) {
11954                 fprintf(stderr, "extent root initialization failed\n");
11955                 /*
11956                  * When the transaction code is updated we should end the
11957                  * transaction, but for now progs only knows about commit so
11958                  * just return an error.
11959                  */
11960                 return ret;
11961         }
11962
11963         /*
11964          * Now we have all the in-memory block groups setup so we can make
11965          * allocations properly, and the metadata we care about is safe since we
11966          * pinned all of it above.
11967          */
11968         while (1) {
11969                 struct btrfs_block_group_cache *cache;
11970
11971                 cache = btrfs_lookup_first_block_group(fs_info, start);
11972                 if (!cache)
11973                         break;
11974                 start = cache->key.objectid + cache->key.offset;
11975                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11976                                         &cache->key, &cache->item,
11977                                         sizeof(cache->item));
11978                 if (ret) {
11979                         fprintf(stderr, "Error adding block group\n");
11980                         return ret;
11981                 }
11982                 btrfs_extent_post_op(trans, fs_info->extent_root);
11983         }
11984
11985         ret = reset_balance(trans, fs_info);
11986         if (ret)
11987                 fprintf(stderr, "error resetting the pending balance\n");
11988
11989         return ret;
11990 }
11991
11992 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11993 {
11994         struct btrfs_path path;
11995         struct btrfs_trans_handle *trans;
11996         struct btrfs_key key;
11997         int ret;
11998
11999         printf("Recowing metadata block %llu\n", eb->start);
12000         key.objectid = btrfs_header_owner(eb);
12001         key.type = BTRFS_ROOT_ITEM_KEY;
12002         key.offset = (u64)-1;
12003
12004         root = btrfs_read_fs_root(root->fs_info, &key);
12005         if (IS_ERR(root)) {
12006                 fprintf(stderr, "Couldn't find owner root %llu\n",
12007                         key.objectid);
12008                 return PTR_ERR(root);
12009         }
12010
12011         trans = btrfs_start_transaction(root, 1);
12012         if (IS_ERR(trans))
12013                 return PTR_ERR(trans);
12014
12015         btrfs_init_path(&path);
12016         path.lowest_level = btrfs_header_level(eb);
12017         if (path.lowest_level)
12018                 btrfs_node_key_to_cpu(eb, &key, 0);
12019         else
12020                 btrfs_item_key_to_cpu(eb, &key, 0);
12021
12022         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12023         btrfs_commit_transaction(trans, root);
12024         btrfs_release_path(&path);
12025         return ret;
12026 }
12027
12028 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12029 {
12030         struct btrfs_path path;
12031         struct btrfs_trans_handle *trans;
12032         struct btrfs_key key;
12033         int ret;
12034
12035         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12036                bad->key.type, bad->key.offset);
12037         key.objectid = bad->root_id;
12038         key.type = BTRFS_ROOT_ITEM_KEY;
12039         key.offset = (u64)-1;
12040
12041         root = btrfs_read_fs_root(root->fs_info, &key);
12042         if (IS_ERR(root)) {
12043                 fprintf(stderr, "Couldn't find owner root %llu\n",
12044                         key.objectid);
12045                 return PTR_ERR(root);
12046         }
12047
12048         trans = btrfs_start_transaction(root, 1);
12049         if (IS_ERR(trans))
12050                 return PTR_ERR(trans);
12051
12052         btrfs_init_path(&path);
12053         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12054         if (ret) {
12055                 if (ret > 0)
12056                         ret = 0;
12057                 goto out;
12058         }
12059         ret = btrfs_del_item(trans, root, &path);
12060 out:
12061         btrfs_commit_transaction(trans, root);
12062         btrfs_release_path(&path);
12063         return ret;
12064 }
12065
12066 static int zero_log_tree(struct btrfs_root *root)
12067 {
12068         struct btrfs_trans_handle *trans;
12069         int ret;
12070
12071         trans = btrfs_start_transaction(root, 1);
12072         if (IS_ERR(trans)) {
12073                 ret = PTR_ERR(trans);
12074                 return ret;
12075         }
12076         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12077         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12078         ret = btrfs_commit_transaction(trans, root);
12079         return ret;
12080 }
12081
12082 static int populate_csum(struct btrfs_trans_handle *trans,
12083                          struct btrfs_root *csum_root, char *buf, u64 start,
12084                          u64 len)
12085 {
12086         u64 offset = 0;
12087         u64 sectorsize;
12088         int ret = 0;
12089
12090         while (offset < len) {
12091                 sectorsize = csum_root->sectorsize;
12092                 ret = read_extent_data(csum_root, buf, start + offset,
12093                                        &sectorsize, 0);
12094                 if (ret)
12095                         break;
12096                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12097                                             start + offset, buf, sectorsize);
12098                 if (ret)
12099                         break;
12100                 offset += sectorsize;
12101         }
12102         return ret;
12103 }
12104
12105 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12106                                       struct btrfs_root *csum_root,
12107                                       struct btrfs_root *cur_root)
12108 {
12109         struct btrfs_path path;
12110         struct btrfs_key key;
12111         struct extent_buffer *node;
12112         struct btrfs_file_extent_item *fi;
12113         char *buf = NULL;
12114         u64 start = 0;
12115         u64 len = 0;
12116         int slot = 0;
12117         int ret = 0;
12118
12119         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12120         if (!buf)
12121                 return -ENOMEM;
12122
12123         btrfs_init_path(&path);
12124         key.objectid = 0;
12125         key.offset = 0;
12126         key.type = 0;
12127         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12128         if (ret < 0)
12129                 goto out;
12130         /* Iterate all regular file extents and fill its csum */
12131         while (1) {
12132                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12133
12134                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12135                         goto next;
12136                 node = path.nodes[0];
12137                 slot = path.slots[0];
12138                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12139                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12140                         goto next;
12141                 start = btrfs_file_extent_disk_bytenr(node, fi);
12142                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12143
12144                 ret = populate_csum(trans, csum_root, buf, start, len);
12145                 if (ret == -EEXIST)
12146                         ret = 0;
12147                 if (ret < 0)
12148                         goto out;
12149 next:
12150                 /*
12151                  * TODO: if next leaf is corrupted, jump to nearest next valid
12152                  * leaf.
12153                  */
12154                 ret = btrfs_next_item(cur_root, &path);
12155                 if (ret < 0)
12156                         goto out;
12157                 if (ret > 0) {
12158                         ret = 0;
12159                         goto out;
12160                 }
12161         }
12162
12163 out:
12164         btrfs_release_path(&path);
12165         free(buf);
12166         return ret;
12167 }
12168
12169 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12170                                   struct btrfs_root *csum_root)
12171 {
12172         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12173         struct btrfs_path path;
12174         struct btrfs_root *tree_root = fs_info->tree_root;
12175         struct btrfs_root *cur_root;
12176         struct extent_buffer *node;
12177         struct btrfs_key key;
12178         int slot = 0;
12179         int ret = 0;
12180
12181         btrfs_init_path(&path);
12182         key.objectid = BTRFS_FS_TREE_OBJECTID;
12183         key.offset = 0;
12184         key.type = BTRFS_ROOT_ITEM_KEY;
12185         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12186         if (ret < 0)
12187                 goto out;
12188         if (ret > 0) {
12189                 ret = -ENOENT;
12190                 goto out;
12191         }
12192
12193         while (1) {
12194                 node = path.nodes[0];
12195                 slot = path.slots[0];
12196                 btrfs_item_key_to_cpu(node, &key, slot);
12197                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12198                         goto out;
12199                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12200                         goto next;
12201                 if (!is_fstree(key.objectid))
12202                         goto next;
12203                 key.offset = (u64)-1;
12204
12205                 cur_root = btrfs_read_fs_root(fs_info, &key);
12206                 if (IS_ERR(cur_root) || !cur_root) {
12207                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12208                                 key.objectid);
12209                         goto out;
12210                 }
12211                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12212                                 cur_root);
12213                 if (ret < 0)
12214                         goto out;
12215 next:
12216                 ret = btrfs_next_item(tree_root, &path);
12217                 if (ret > 0) {
12218                         ret = 0;
12219                         goto out;
12220                 }
12221                 if (ret < 0)
12222                         goto out;
12223         }
12224
12225 out:
12226         btrfs_release_path(&path);
12227         return ret;
12228 }
12229
12230 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12231                                       struct btrfs_root *csum_root)
12232 {
12233         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12234         struct btrfs_path path;
12235         struct btrfs_extent_item *ei;
12236         struct extent_buffer *leaf;
12237         char *buf;
12238         struct btrfs_key key;
12239         int ret;
12240
12241         btrfs_init_path(&path);
12242         key.objectid = 0;
12243         key.type = BTRFS_EXTENT_ITEM_KEY;
12244         key.offset = 0;
12245         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12246         if (ret < 0) {
12247                 btrfs_release_path(&path);
12248                 return ret;
12249         }
12250
12251         buf = malloc(csum_root->sectorsize);
12252         if (!buf) {
12253                 btrfs_release_path(&path);
12254                 return -ENOMEM;
12255         }
12256
12257         while (1) {
12258                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12259                         ret = btrfs_next_leaf(extent_root, &path);
12260                         if (ret < 0)
12261                                 break;
12262                         if (ret) {
12263                                 ret = 0;
12264                                 break;
12265                         }
12266                 }
12267                 leaf = path.nodes[0];
12268
12269                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12270                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12271                         path.slots[0]++;
12272                         continue;
12273                 }
12274
12275                 ei = btrfs_item_ptr(leaf, path.slots[0],
12276                                     struct btrfs_extent_item);
12277                 if (!(btrfs_extent_flags(leaf, ei) &
12278                       BTRFS_EXTENT_FLAG_DATA)) {
12279                         path.slots[0]++;
12280                         continue;
12281                 }
12282
12283                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12284                                     key.offset);
12285                 if (ret)
12286                         break;
12287                 path.slots[0]++;
12288         }
12289
12290         btrfs_release_path(&path);
12291         free(buf);
12292         return ret;
12293 }
12294
12295 /*
12296  * Recalculate the csum and put it into the csum tree.
12297  *
12298  * Extent tree init will wipe out all the extent info, so in that case, we
12299  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12300  * will use fs/subvol trees to init the csum tree.
12301  */
12302 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12303                           struct btrfs_root *csum_root,
12304                           int search_fs_tree)
12305 {
12306         if (search_fs_tree)
12307                 return fill_csum_tree_from_fs(trans, csum_root);
12308         else
12309                 return fill_csum_tree_from_extent(trans, csum_root);
12310 }
12311
12312 static void free_roots_info_cache(void)
12313 {
12314         if (!roots_info_cache)
12315                 return;
12316
12317         while (!cache_tree_empty(roots_info_cache)) {
12318                 struct cache_extent *entry;
12319                 struct root_item_info *rii;
12320
12321                 entry = first_cache_extent(roots_info_cache);
12322                 if (!entry)
12323                         break;
12324                 remove_cache_extent(roots_info_cache, entry);
12325                 rii = container_of(entry, struct root_item_info, cache_extent);
12326                 free(rii);
12327         }
12328
12329         free(roots_info_cache);
12330         roots_info_cache = NULL;
12331 }
12332
12333 static int build_roots_info_cache(struct btrfs_fs_info *info)
12334 {
12335         int ret = 0;
12336         struct btrfs_key key;
12337         struct extent_buffer *leaf;
12338         struct btrfs_path path;
12339
12340         if (!roots_info_cache) {
12341                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12342                 if (!roots_info_cache)
12343                         return -ENOMEM;
12344                 cache_tree_init(roots_info_cache);
12345         }
12346
12347         btrfs_init_path(&path);
12348         key.objectid = 0;
12349         key.type = BTRFS_EXTENT_ITEM_KEY;
12350         key.offset = 0;
12351         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12352         if (ret < 0)
12353                 goto out;
12354         leaf = path.nodes[0];
12355
12356         while (1) {
12357                 struct btrfs_key found_key;
12358                 struct btrfs_extent_item *ei;
12359                 struct btrfs_extent_inline_ref *iref;
12360                 int slot = path.slots[0];
12361                 int type;
12362                 u64 flags;
12363                 u64 root_id;
12364                 u8 level;
12365                 struct cache_extent *entry;
12366                 struct root_item_info *rii;
12367
12368                 if (slot >= btrfs_header_nritems(leaf)) {
12369                         ret = btrfs_next_leaf(info->extent_root, &path);
12370                         if (ret < 0) {
12371                                 break;
12372                         } else if (ret) {
12373                                 ret = 0;
12374                                 break;
12375                         }
12376                         leaf = path.nodes[0];
12377                         slot = path.slots[0];
12378                 }
12379
12380                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12381
12382                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12383                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12384                         goto next;
12385
12386                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12387                 flags = btrfs_extent_flags(leaf, ei);
12388
12389                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12390                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12391                         goto next;
12392
12393                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12394                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12395                         level = found_key.offset;
12396                 } else {
12397                         struct btrfs_tree_block_info *binfo;
12398
12399                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12400                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12401                         level = btrfs_tree_block_level(leaf, binfo);
12402                 }
12403
12404                 /*
12405                  * For a root extent, it must be of the following type and the
12406                  * first (and only one) iref in the item.
12407                  */
12408                 type = btrfs_extent_inline_ref_type(leaf, iref);
12409                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12410                         goto next;
12411
12412                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12413                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12414                 if (!entry) {
12415                         rii = malloc(sizeof(struct root_item_info));
12416                         if (!rii) {
12417                                 ret = -ENOMEM;
12418                                 goto out;
12419                         }
12420                         rii->cache_extent.start = root_id;
12421                         rii->cache_extent.size = 1;
12422                         rii->level = (u8)-1;
12423                         entry = &rii->cache_extent;
12424                         ret = insert_cache_extent(roots_info_cache, entry);
12425                         ASSERT(ret == 0);
12426                 } else {
12427                         rii = container_of(entry, struct root_item_info,
12428                                            cache_extent);
12429                 }
12430
12431                 ASSERT(rii->cache_extent.start == root_id);
12432                 ASSERT(rii->cache_extent.size == 1);
12433
12434                 if (level > rii->level || rii->level == (u8)-1) {
12435                         rii->level = level;
12436                         rii->bytenr = found_key.objectid;
12437                         rii->gen = btrfs_extent_generation(leaf, ei);
12438                         rii->node_count = 1;
12439                 } else if (level == rii->level) {
12440                         rii->node_count++;
12441                 }
12442 next:
12443                 path.slots[0]++;
12444         }
12445
12446 out:
12447         btrfs_release_path(&path);
12448
12449         return ret;
12450 }
12451
12452 static int maybe_repair_root_item(struct btrfs_path *path,
12453                                   const struct btrfs_key *root_key,
12454                                   const int read_only_mode)
12455 {
12456         const u64 root_id = root_key->objectid;
12457         struct cache_extent *entry;
12458         struct root_item_info *rii;
12459         struct btrfs_root_item ri;
12460         unsigned long offset;
12461
12462         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12463         if (!entry) {
12464                 fprintf(stderr,
12465                         "Error: could not find extent items for root %llu\n",
12466                         root_key->objectid);
12467                 return -ENOENT;
12468         }
12469
12470         rii = container_of(entry, struct root_item_info, cache_extent);
12471         ASSERT(rii->cache_extent.start == root_id);
12472         ASSERT(rii->cache_extent.size == 1);
12473
12474         if (rii->node_count != 1) {
12475                 fprintf(stderr,
12476                         "Error: could not find btree root extent for root %llu\n",
12477                         root_id);
12478                 return -ENOENT;
12479         }
12480
12481         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12482         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12483
12484         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12485             btrfs_root_level(&ri) != rii->level ||
12486             btrfs_root_generation(&ri) != rii->gen) {
12487
12488                 /*
12489                  * If we're in repair mode but our caller told us to not update
12490                  * the root item, i.e. just check if it needs to be updated, don't
12491                  * print this message, since the caller will call us again shortly
12492                  * for the same root item without read only mode (the caller will
12493                  * open a transaction first).
12494                  */
12495                 if (!(read_only_mode && repair))
12496                         fprintf(stderr,
12497                                 "%sroot item for root %llu,"
12498                                 " current bytenr %llu, current gen %llu, current level %u,"
12499                                 " new bytenr %llu, new gen %llu, new level %u\n",
12500                                 (read_only_mode ? "" : "fixing "),
12501                                 root_id,
12502                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12503                                 btrfs_root_level(&ri),
12504                                 rii->bytenr, rii->gen, rii->level);
12505
12506                 if (btrfs_root_generation(&ri) > rii->gen) {
12507                         fprintf(stderr,
12508                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12509                                 root_id, btrfs_root_generation(&ri), rii->gen);
12510                         return -EINVAL;
12511                 }
12512
12513                 if (!read_only_mode) {
12514                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12515                         btrfs_set_root_level(&ri, rii->level);
12516                         btrfs_set_root_generation(&ri, rii->gen);
12517                         write_extent_buffer(path->nodes[0], &ri,
12518                                             offset, sizeof(ri));
12519                 }
12520
12521                 return 1;
12522         }
12523
12524         return 0;
12525 }
12526
12527 /*
12528  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12529  * caused read-only snapshots to be corrupted if they were created at a moment
12530  * when the source subvolume/snapshot had orphan items. The issue was that the
12531  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12532  * node instead of the post orphan cleanup root node.
12533  * So this function, and its callees, just detects and fixes those cases. Even
12534  * though the regression was for read-only snapshots, this function applies to
12535  * any snapshot/subvolume root.
12536  * This must be run before any other repair code - not doing it so, makes other
12537  * repair code delete or modify backrefs in the extent tree for example, which
12538  * will result in an inconsistent fs after repairing the root items.
12539  */
12540 static int repair_root_items(struct btrfs_fs_info *info)
12541 {
12542         struct btrfs_path path;
12543         struct btrfs_key key;
12544         struct extent_buffer *leaf;
12545         struct btrfs_trans_handle *trans = NULL;
12546         int ret = 0;
12547         int bad_roots = 0;
12548         int need_trans = 0;
12549
12550         btrfs_init_path(&path);
12551
12552         ret = build_roots_info_cache(info);
12553         if (ret)
12554                 goto out;
12555
12556         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12557         key.type = BTRFS_ROOT_ITEM_KEY;
12558         key.offset = 0;
12559
12560 again:
12561         /*
12562          * Avoid opening and committing transactions if a leaf doesn't have
12563          * any root items that need to be fixed, so that we avoid rotating
12564          * backup roots unnecessarily.
12565          */
12566         if (need_trans) {
12567                 trans = btrfs_start_transaction(info->tree_root, 1);
12568                 if (IS_ERR(trans)) {
12569                         ret = PTR_ERR(trans);
12570                         goto out;
12571                 }
12572         }
12573
12574         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12575                                 0, trans ? 1 : 0);
12576         if (ret < 0)
12577                 goto out;
12578         leaf = path.nodes[0];
12579
12580         while (1) {
12581                 struct btrfs_key found_key;
12582
12583                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12584                         int no_more_keys = find_next_key(&path, &key);
12585
12586                         btrfs_release_path(&path);
12587                         if (trans) {
12588                                 ret = btrfs_commit_transaction(trans,
12589                                                                info->tree_root);
12590                                 trans = NULL;
12591                                 if (ret < 0)
12592                                         goto out;
12593                         }
12594                         need_trans = 0;
12595                         if (no_more_keys)
12596                                 break;
12597                         goto again;
12598                 }
12599
12600                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12601
12602                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12603                         goto next;
12604                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12605                         goto next;
12606
12607                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12608                 if (ret < 0)
12609                         goto out;
12610                 if (ret) {
12611                         if (!trans && repair) {
12612                                 need_trans = 1;
12613                                 key = found_key;
12614                                 btrfs_release_path(&path);
12615                                 goto again;
12616                         }
12617                         bad_roots++;
12618                 }
12619 next:
12620                 path.slots[0]++;
12621         }
12622         ret = 0;
12623 out:
12624         free_roots_info_cache();
12625         btrfs_release_path(&path);
12626         if (trans)
12627                 btrfs_commit_transaction(trans, info->tree_root);
12628         if (ret < 0)
12629                 return ret;
12630
12631         return bad_roots;
12632 }
12633
12634 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12635 {
12636         struct btrfs_trans_handle *trans;
12637         struct btrfs_block_group_cache *bg_cache;
12638         u64 current = 0;
12639         int ret = 0;
12640
12641         /* Clear all free space cache inodes and its extent data */
12642         while (1) {
12643                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12644                 if (!bg_cache)
12645                         break;
12646                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12647                 if (ret < 0)
12648                         return ret;
12649                 current = bg_cache->key.objectid + bg_cache->key.offset;
12650         }
12651
12652         /* Don't forget to set cache_generation to -1 */
12653         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12654         if (IS_ERR(trans)) {
12655                 error("failed to update super block cache generation");
12656                 return PTR_ERR(trans);
12657         }
12658         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12659         btrfs_commit_transaction(trans, fs_info->tree_root);
12660
12661         return ret;
12662 }
12663
12664 const char * const cmd_check_usage[] = {
12665         "btrfs check [options] <device>",
12666         "Check structural integrity of a filesystem (unmounted).",
12667         "Check structural integrity of an unmounted filesystem. Verify internal",
12668         "trees' consistency and item connectivity. In the repair mode try to",
12669         "fix the problems found. ",
12670         "WARNING: the repair mode is considered dangerous",
12671         "",
12672         "-s|--super <superblock>     use this superblock copy",
12673         "-b|--backup                 use the first valid backup root copy",
12674         "--repair                    try to repair the filesystem",
12675         "--readonly                  run in read-only mode (default)",
12676         "--init-csum-tree            create a new CRC tree",
12677         "--init-extent-tree          create a new extent tree",
12678         "--mode <MODE>               allows choice of memory/IO trade-offs",
12679         "                            where MODE is one of:",
12680         "                            original - read inodes and extents to memory (requires",
12681         "                                       more memory, does less IO)",
12682         "                            lowmem   - try to use less memory but read blocks again",
12683         "                                       when needed",
12684         "--check-data-csum           verify checksums of data blocks",
12685         "-Q|--qgroup-report          print a report on qgroup consistency",
12686         "-E|--subvol-extents <subvolid>",
12687         "                            print subvolume extents and sharing state",
12688         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12689         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12690         "-p|--progress               indicate progress",
12691         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12692         NULL
12693 };
12694
12695 int cmd_check(int argc, char **argv)
12696 {
12697         struct cache_tree root_cache;
12698         struct btrfs_root *root;
12699         struct btrfs_fs_info *info;
12700         u64 bytenr = 0;
12701         u64 subvolid = 0;
12702         u64 tree_root_bytenr = 0;
12703         u64 chunk_root_bytenr = 0;
12704         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12705         int ret;
12706         int err = 0;
12707         u64 num;
12708         int init_csum_tree = 0;
12709         int readonly = 0;
12710         int clear_space_cache = 0;
12711         int qgroup_report = 0;
12712         int qgroups_repaired = 0;
12713         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12714
12715         while(1) {
12716                 int c;
12717                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12718                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12719                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12720                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12721                 static const struct option long_options[] = {
12722                         { "super", required_argument, NULL, 's' },
12723                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12724                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12725                         { "init-csum-tree", no_argument, NULL,
12726                                 GETOPT_VAL_INIT_CSUM },
12727                         { "init-extent-tree", no_argument, NULL,
12728                                 GETOPT_VAL_INIT_EXTENT },
12729                         { "check-data-csum", no_argument, NULL,
12730                                 GETOPT_VAL_CHECK_CSUM },
12731                         { "backup", no_argument, NULL, 'b' },
12732                         { "subvol-extents", required_argument, NULL, 'E' },
12733                         { "qgroup-report", no_argument, NULL, 'Q' },
12734                         { "tree-root", required_argument, NULL, 'r' },
12735                         { "chunk-root", required_argument, NULL,
12736                                 GETOPT_VAL_CHUNK_TREE },
12737                         { "progress", no_argument, NULL, 'p' },
12738                         { "mode", required_argument, NULL,
12739                                 GETOPT_VAL_MODE },
12740                         { "clear-space-cache", required_argument, NULL,
12741                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12742                         { NULL, 0, NULL, 0}
12743                 };
12744
12745                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12746                 if (c < 0)
12747                         break;
12748                 switch(c) {
12749                         case 'a': /* ignored */ break;
12750                         case 'b':
12751                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12752                                 break;
12753                         case 's':
12754                                 num = arg_strtou64(optarg);
12755                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12756                                         error(
12757                                         "super mirror should be less than %d",
12758                                                 BTRFS_SUPER_MIRROR_MAX);
12759                                         exit(1);
12760                                 }
12761                                 bytenr = btrfs_sb_offset(((int)num));
12762                                 printf("using SB copy %llu, bytenr %llu\n", num,
12763                                        (unsigned long long)bytenr);
12764                                 break;
12765                         case 'Q':
12766                                 qgroup_report = 1;
12767                                 break;
12768                         case 'E':
12769                                 subvolid = arg_strtou64(optarg);
12770                                 break;
12771                         case 'r':
12772                                 tree_root_bytenr = arg_strtou64(optarg);
12773                                 break;
12774                         case GETOPT_VAL_CHUNK_TREE:
12775                                 chunk_root_bytenr = arg_strtou64(optarg);
12776                                 break;
12777                         case 'p':
12778                                 ctx.progress_enabled = true;
12779                                 break;
12780                         case '?':
12781                         case 'h':
12782                                 usage(cmd_check_usage);
12783                         case GETOPT_VAL_REPAIR:
12784                                 printf("enabling repair mode\n");
12785                                 repair = 1;
12786                                 ctree_flags |= OPEN_CTREE_WRITES;
12787                                 break;
12788                         case GETOPT_VAL_READONLY:
12789                                 readonly = 1;
12790                                 break;
12791                         case GETOPT_VAL_INIT_CSUM:
12792                                 printf("Creating a new CRC tree\n");
12793                                 init_csum_tree = 1;
12794                                 repair = 1;
12795                                 ctree_flags |= OPEN_CTREE_WRITES;
12796                                 break;
12797                         case GETOPT_VAL_INIT_EXTENT:
12798                                 init_extent_tree = 1;
12799                                 ctree_flags |= (OPEN_CTREE_WRITES |
12800                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12801                                 repair = 1;
12802                                 break;
12803                         case GETOPT_VAL_CHECK_CSUM:
12804                                 check_data_csum = 1;
12805                                 break;
12806                         case GETOPT_VAL_MODE:
12807                                 check_mode = parse_check_mode(optarg);
12808                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12809                                         error("unknown mode: %s", optarg);
12810                                         exit(1);
12811                                 }
12812                                 break;
12813                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12814                                 if (strcmp(optarg, "v1") == 0) {
12815                                         clear_space_cache = 1;
12816                                 } else if (strcmp(optarg, "v2") == 0) {
12817                                         clear_space_cache = 2;
12818                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12819                                 } else {
12820                                         error(
12821                 "invalid argument to --clear-space-cache, must be v1 or v2");
12822                                         exit(1);
12823                                 }
12824                                 ctree_flags |= OPEN_CTREE_WRITES;
12825                                 break;
12826                 }
12827         }
12828
12829         if (check_argc_exact(argc - optind, 1))
12830                 usage(cmd_check_usage);
12831
12832         if (ctx.progress_enabled) {
12833                 ctx.tp = TASK_NOTHING;
12834                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12835         }
12836
12837         /* This check is the only reason for --readonly to exist */
12838         if (readonly && repair) {
12839                 error("repair options are not compatible with --readonly");
12840                 exit(1);
12841         }
12842
12843         /*
12844          * Not supported yet
12845          */
12846         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12847                 error("low memory mode doesn't support repair yet");
12848                 exit(1);
12849         }
12850
12851         radix_tree_init();
12852         cache_tree_init(&root_cache);
12853
12854         if((ret = check_mounted(argv[optind])) < 0) {
12855                 error("could not check mount status: %s", strerror(-ret));
12856                 err |= !!ret;
12857                 goto err_out;
12858         } else if(ret) {
12859                 error("%s is currently mounted, aborting", argv[optind]);
12860                 ret = -EBUSY;
12861                 err |= !!ret;
12862                 goto err_out;
12863         }
12864
12865         /* only allow partial opening under repair mode */
12866         if (repair)
12867                 ctree_flags |= OPEN_CTREE_PARTIAL;
12868
12869         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12870                                   chunk_root_bytenr, ctree_flags);
12871         if (!info) {
12872                 error("cannot open file system");
12873                 ret = -EIO;
12874                 err |= !!ret;
12875                 goto err_out;
12876         }
12877
12878         global_info = info;
12879         root = info->fs_root;
12880         if (clear_space_cache == 1) {
12881                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12882                         error(
12883                 "free space cache v2 detected, use --clear-space-cache v2");
12884                         ret = 1;
12885                         goto close_out;
12886                 }
12887                 printf("Clearing free space cache\n");
12888                 ret = clear_free_space_cache(info);
12889                 if (ret) {
12890                         error("failed to clear free space cache");
12891                         ret = 1;
12892                 } else {
12893                         printf("Free space cache cleared\n");
12894                 }
12895                 goto close_out;
12896         } else if (clear_space_cache == 2) {
12897                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12898                         printf("no free space cache v2 to clear\n");
12899                         ret = 0;
12900                         goto close_out;
12901                 }
12902                 printf("Clear free space cache v2\n");
12903                 ret = btrfs_clear_free_space_tree(info);
12904                 if (ret) {
12905                         error("failed to clear free space cache v2: %d", ret);
12906                         ret = 1;
12907                 } else {
12908                         printf("free space cache v2 cleared\n");
12909                 }
12910                 goto close_out;
12911         }
12912
12913         /*
12914          * repair mode will force us to commit transaction which
12915          * will make us fail to load log tree when mounting.
12916          */
12917         if (repair && btrfs_super_log_root(info->super_copy)) {
12918                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12919                 if (!ret) {
12920                         ret = 1;
12921                         err |= !!ret;
12922                         goto close_out;
12923                 }
12924                 ret = zero_log_tree(root);
12925                 err |= !!ret;
12926                 if (ret) {
12927                         error("failed to zero log tree: %d", ret);
12928                         goto close_out;
12929                 }
12930         }
12931
12932         uuid_unparse(info->super_copy->fsid, uuidbuf);
12933         if (qgroup_report) {
12934                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12935                        uuidbuf);
12936                 ret = qgroup_verify_all(info);
12937                 err |= !!ret;
12938                 if (ret == 0)
12939                         report_qgroups(1);
12940                 goto close_out;
12941         }
12942         if (subvolid) {
12943                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12944                        subvolid, argv[optind], uuidbuf);
12945                 ret = print_extent_state(info, subvolid);
12946                 err |= !!ret;
12947                 goto close_out;
12948         }
12949         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12950
12951         if (!extent_buffer_uptodate(info->tree_root->node) ||
12952             !extent_buffer_uptodate(info->dev_root->node) ||
12953             !extent_buffer_uptodate(info->chunk_root->node)) {
12954                 error("critical roots corrupted, unable to check the filesystem");
12955                 err |= !!ret;
12956                 ret = -EIO;
12957                 goto close_out;
12958         }
12959
12960         if (init_extent_tree || init_csum_tree) {
12961                 struct btrfs_trans_handle *trans;
12962
12963                 trans = btrfs_start_transaction(info->extent_root, 0);
12964                 if (IS_ERR(trans)) {
12965                         error("error starting transaction");
12966                         ret = PTR_ERR(trans);
12967                         err |= !!ret;
12968                         goto close_out;
12969                 }
12970
12971                 if (init_extent_tree) {
12972                         printf("Creating a new extent tree\n");
12973                         ret = reinit_extent_tree(trans, info);
12974                         err |= !!ret;
12975                         if (ret)
12976                                 goto close_out;
12977                 }
12978
12979                 if (init_csum_tree) {
12980                         printf("Reinitialize checksum tree\n");
12981                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12982                         if (ret) {
12983                                 error("checksum tree initialization failed: %d",
12984                                                 ret);
12985                                 ret = -EIO;
12986                                 err |= !!ret;
12987                                 goto close_out;
12988                         }
12989
12990                         ret = fill_csum_tree(trans, info->csum_root,
12991                                              init_extent_tree);
12992                         err |= !!ret;
12993                         if (ret) {
12994                                 error("checksum tree refilling failed: %d", ret);
12995                                 return -EIO;
12996                         }
12997                 }
12998                 /*
12999                  * Ok now we commit and run the normal fsck, which will add
13000                  * extent entries for all of the items it finds.
13001                  */
13002                 ret = btrfs_commit_transaction(trans, info->extent_root);
13003                 err |= !!ret;
13004                 if (ret)
13005                         goto close_out;
13006         }
13007         if (!extent_buffer_uptodate(info->extent_root->node)) {
13008                 error("critical: extent_root, unable to check the filesystem");
13009                 ret = -EIO;
13010                 err |= !!ret;
13011                 goto close_out;
13012         }
13013         if (!extent_buffer_uptodate(info->csum_root->node)) {
13014                 error("critical: csum_root, unable to check the filesystem");
13015                 ret = -EIO;
13016                 err |= !!ret;
13017                 goto close_out;
13018         }
13019
13020         if (!ctx.progress_enabled)
13021                 fprintf(stderr, "checking extents\n");
13022         if (check_mode == CHECK_MODE_LOWMEM)
13023                 ret = check_chunks_and_extents_v2(root);
13024         else
13025                 ret = check_chunks_and_extents(root);
13026         err |= !!ret;
13027         if (ret)
13028                 error(
13029                 "errors found in extent allocation tree or chunk allocation");
13030
13031         ret = repair_root_items(info);
13032         err |= !!ret;
13033         if (ret < 0) {
13034                 error("failed to repair root items: %s", strerror(-ret));
13035                 goto close_out;
13036         }
13037         if (repair) {
13038                 fprintf(stderr, "Fixed %d roots.\n", ret);
13039                 ret = 0;
13040         } else if (ret > 0) {
13041                 fprintf(stderr,
13042                        "Found %d roots with an outdated root item.\n",
13043                        ret);
13044                 fprintf(stderr,
13045                         "Please run a filesystem check with the option --repair to fix them.\n");
13046                 ret = 1;
13047                 err |= !!ret;
13048                 goto close_out;
13049         }
13050
13051         if (!ctx.progress_enabled) {
13052                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13053                         fprintf(stderr, "checking free space tree\n");
13054                 else
13055                         fprintf(stderr, "checking free space cache\n");
13056         }
13057         ret = check_space_cache(root);
13058         err |= !!ret;
13059         if (ret) {
13060                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13061                         error("errors found in free space tree");
13062                 else
13063                         error("errors found in free space cache");
13064                 goto out;
13065         }
13066
13067         /*
13068          * We used to have to have these hole extents in between our real
13069          * extents so if we don't have this flag set we need to make sure there
13070          * are no gaps in the file extents for inodes, otherwise we can just
13071          * ignore it when this happens.
13072          */
13073         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13074         if (!ctx.progress_enabled)
13075                 fprintf(stderr, "checking fs roots\n");
13076         if (check_mode == CHECK_MODE_LOWMEM)
13077                 ret = check_fs_roots_v2(root->fs_info);
13078         else
13079                 ret = check_fs_roots(root, &root_cache);
13080         err |= !!ret;
13081         if (ret) {
13082                 error("errors found in fs roots");
13083                 goto out;
13084         }
13085
13086         fprintf(stderr, "checking csums\n");
13087         ret = check_csums(root);
13088         err |= !!ret;
13089         if (ret) {
13090                 error("errors found in csum tree");
13091                 goto out;
13092         }
13093
13094         fprintf(stderr, "checking root refs\n");
13095         /* For low memory mode, check_fs_roots_v2 handles root refs */
13096         if (check_mode != CHECK_MODE_LOWMEM) {
13097                 ret = check_root_refs(root, &root_cache);
13098                 err |= !!ret;
13099                 if (ret) {
13100                         error("errors found in root refs");
13101                         goto out;
13102                 }
13103         }
13104
13105         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13106                 struct extent_buffer *eb;
13107
13108                 eb = list_first_entry(&root->fs_info->recow_ebs,
13109                                       struct extent_buffer, recow);
13110                 list_del_init(&eb->recow);
13111                 ret = recow_extent_buffer(root, eb);
13112                 err |= !!ret;
13113                 if (ret) {
13114                         error("fails to fix transid errors");
13115                         break;
13116                 }
13117         }
13118
13119         while (!list_empty(&delete_items)) {
13120                 struct bad_item *bad;
13121
13122                 bad = list_first_entry(&delete_items, struct bad_item, list);
13123                 list_del_init(&bad->list);
13124                 if (repair) {
13125                         ret = delete_bad_item(root, bad);
13126                         err |= !!ret;
13127                 }
13128                 free(bad);
13129         }
13130
13131         if (info->quota_enabled) {
13132                 fprintf(stderr, "checking quota groups\n");
13133                 ret = qgroup_verify_all(info);
13134                 err |= !!ret;
13135                 if (ret) {
13136                         error("failed to check quota groups");
13137                         goto out;
13138                 }
13139                 report_qgroups(0);
13140                 ret = repair_qgroups(info, &qgroups_repaired);
13141                 err |= !!ret;
13142                 if (err) {
13143                         error("failed to repair quota groups");
13144                         goto out;
13145                 }
13146                 ret = 0;
13147         }
13148
13149         if (!list_empty(&root->fs_info->recow_ebs)) {
13150                 error("transid errors in file system");
13151                 ret = 1;
13152                 err |= !!ret;
13153         }
13154 out:
13155         if (found_old_backref) { /*
13156                  * there was a disk format change when mixed
13157                  * backref was in testing tree. The old format
13158                  * existed about one week.
13159                  */
13160                 printf("\n * Found old mixed backref format. "
13161                        "The old format is not supported! *"
13162                        "\n * Please mount the FS in readonly mode, "
13163                        "backup data and re-format the FS. *\n\n");
13164                 err |= 1;
13165         }
13166         printf("found %llu bytes used, ",
13167                (unsigned long long)bytes_used);
13168         if (err)
13169                 printf("error(s) found\n");
13170         else
13171                 printf("no error found\n");
13172         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13173         printf("total tree bytes: %llu\n",
13174                (unsigned long long)total_btree_bytes);
13175         printf("total fs tree bytes: %llu\n",
13176                (unsigned long long)total_fs_tree_bytes);
13177         printf("total extent tree bytes: %llu\n",
13178                (unsigned long long)total_extent_tree_bytes);
13179         printf("btree space waste bytes: %llu\n",
13180                (unsigned long long)btree_space_waste);
13181         printf("file data blocks allocated: %llu\n referenced %llu\n",
13182                 (unsigned long long)data_bytes_allocated,
13183                 (unsigned long long)data_bytes_referenced);
13184
13185         free_qgroup_counts();
13186         free_root_recs_tree(&root_cache);
13187 close_out:
13188         close_ctree(root);
13189 err_out:
13190         if (ctx.progress_enabled)
13191                 task_deinit(ctx.info);
13192
13193         return err;
13194 }