7a973e32205de2c5d23aa2d6c2ea411628c95371
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize,
833                                          root->fs_info->sectorsize));
834         }
835 }
836
837 static void print_ref_error(int errors)
838 {
839         if (errors & REF_ERR_NO_DIR_ITEM)
840                 fprintf(stderr, ", no dir item");
841         if (errors & REF_ERR_NO_DIR_INDEX)
842                 fprintf(stderr, ", no dir index");
843         if (errors & REF_ERR_NO_INODE_REF)
844                 fprintf(stderr, ", no inode ref");
845         if (errors & REF_ERR_DUP_DIR_ITEM)
846                 fprintf(stderr, ", dup dir item");
847         if (errors & REF_ERR_DUP_DIR_INDEX)
848                 fprintf(stderr, ", dup dir index");
849         if (errors & REF_ERR_DUP_INODE_REF)
850                 fprintf(stderr, ", dup inode ref");
851         if (errors & REF_ERR_INDEX_UNMATCH)
852                 fprintf(stderr, ", index mismatch");
853         if (errors & REF_ERR_FILETYPE_UNMATCH)
854                 fprintf(stderr, ", filetype mismatch");
855         if (errors & REF_ERR_NAME_TOO_LONG)
856                 fprintf(stderr, ", name too long");
857         if (errors & REF_ERR_NO_ROOT_REF)
858                 fprintf(stderr, ", no root ref");
859         if (errors & REF_ERR_NO_ROOT_BACKREF)
860                 fprintf(stderr, ", no root backref");
861         if (errors & REF_ERR_DUP_ROOT_REF)
862                 fprintf(stderr, ", dup root ref");
863         if (errors & REF_ERR_DUP_ROOT_BACKREF)
864                 fprintf(stderr, ", dup root backref");
865         fprintf(stderr, "\n");
866 }
867
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869                                           u64 ino, int mod)
870 {
871         struct ptr_node *node;
872         struct cache_extent *cache;
873         struct inode_record *rec = NULL;
874         int ret;
875
876         cache = lookup_cache_extent(inode_cache, ino, 1);
877         if (cache) {
878                 node = container_of(cache, struct ptr_node, cache);
879                 rec = node->data;
880                 if (mod && rec->refs > 1) {
881                         node->data = clone_inode_rec(rec);
882                         if (IS_ERR(node->data))
883                                 return node->data;
884                         rec->refs--;
885                         rec = node->data;
886                 }
887         } else if (mod) {
888                 rec = calloc(1, sizeof(*rec));
889                 if (!rec)
890                         return ERR_PTR(-ENOMEM);
891                 rec->ino = ino;
892                 rec->extent_start = (u64)-1;
893                 rec->refs = 1;
894                 INIT_LIST_HEAD(&rec->backrefs);
895                 INIT_LIST_HEAD(&rec->orphan_extents);
896                 rec->holes = RB_ROOT;
897
898                 node = malloc(sizeof(*node));
899                 if (!node) {
900                         free(rec);
901                         return ERR_PTR(-ENOMEM);
902                 }
903                 node->cache.start = ino;
904                 node->cache.size = 1;
905                 node->data = rec;
906
907                 if (ino == BTRFS_FREE_INO_OBJECTID)
908                         rec->found_link = 1;
909
910                 ret = insert_cache_extent(inode_cache, &node->cache);
911                 if (ret)
912                         return ERR_PTR(-EEXIST);
913         }
914         return rec;
915 }
916
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 {
919         struct orphan_data_extent *orphan;
920
921         while (!list_empty(orphan_extents)) {
922                 orphan = list_entry(orphan_extents->next,
923                                     struct orphan_data_extent, list);
924                 list_del(&orphan->list);
925                 free(orphan);
926         }
927 }
928
929 static void free_inode_rec(struct inode_record *rec)
930 {
931         struct inode_backref *backref;
932
933         if (--rec->refs > 0)
934                 return;
935
936         while (!list_empty(&rec->backrefs)) {
937                 backref = to_inode_backref(rec->backrefs.next);
938                 list_del(&backref->list);
939                 free(backref);
940         }
941         free_orphan_data_extents(&rec->orphan_extents);
942         free_file_extent_holes(&rec->holes);
943         free(rec);
944 }
945
946 static int can_free_inode_rec(struct inode_record *rec)
947 {
948         if (!rec->errors && rec->checked && rec->found_inode_item &&
949             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950                 return 1;
951         return 0;
952 }
953
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955                                  struct inode_record *rec)
956 {
957         struct cache_extent *cache;
958         struct inode_backref *tmp, *backref;
959         struct ptr_node *node;
960         u8 filetype;
961
962         if (!rec->found_inode_item)
963                 return;
964
965         filetype = imode_to_type(rec->imode);
966         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967                 if (backref->found_dir_item && backref->found_dir_index) {
968                         if (backref->filetype != filetype)
969                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970                         if (!backref->errors && backref->found_inode_ref &&
971                             rec->nlink == rec->found_link) {
972                                 list_del(&backref->list);
973                                 free(backref);
974                         }
975                 }
976         }
977
978         if (!rec->checked || rec->merging)
979                 return;
980
981         if (S_ISDIR(rec->imode)) {
982                 if (rec->found_size != rec->isize)
983                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984                 if (rec->found_file_extent)
985                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
986         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987                 if (rec->found_dir_item)
988                         rec->errors |= I_ERR_ODD_DIR_ITEM;
989                 if (rec->found_size != rec->nbytes)
990                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991                 if (rec->nlink > 0 && !no_holes &&
992                     (rec->extent_end < rec->isize ||
993                      first_extent_gap(&rec->holes) < rec->isize))
994                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995         }
996
997         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998                 if (rec->found_csum_item && rec->nodatasum)
999                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000                 if (rec->some_csum_missing && !rec->nodatasum)
1001                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002         }
1003
1004         BUG_ON(rec->refs != 1);
1005         if (can_free_inode_rec(rec)) {
1006                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007                 node = container_of(cache, struct ptr_node, cache);
1008                 BUG_ON(node->data != rec);
1009                 remove_cache_extent(inode_cache, &node->cache);
1010                 free(node);
1011                 free_inode_rec(rec);
1012         }
1013 }
1014
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 {
1017         struct btrfs_path path;
1018         struct btrfs_key key;
1019         int ret;
1020
1021         key.objectid = BTRFS_ORPHAN_OBJECTID;
1022         key.type = BTRFS_ORPHAN_ITEM_KEY;
1023         key.offset = ino;
1024
1025         btrfs_init_path(&path);
1026         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027         btrfs_release_path(&path);
1028         if (ret > 0)
1029                 ret = -ENOENT;
1030         return ret;
1031 }
1032
1033 static int process_inode_item(struct extent_buffer *eb,
1034                               int slot, struct btrfs_key *key,
1035                               struct shared_node *active_node)
1036 {
1037         struct inode_record *rec;
1038         struct btrfs_inode_item *item;
1039
1040         rec = active_node->current;
1041         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042         if (rec->found_inode_item) {
1043                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044                 return 1;
1045         }
1046         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047         rec->nlink = btrfs_inode_nlink(eb, item);
1048         rec->isize = btrfs_inode_size(eb, item);
1049         rec->nbytes = btrfs_inode_nbytes(eb, item);
1050         rec->imode = btrfs_inode_mode(eb, item);
1051         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052                 rec->nodatasum = 1;
1053         rec->found_inode_item = 1;
1054         if (rec->nlink == 0)
1055                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056         maybe_free_inode_rec(&active_node->inode_cache, rec);
1057         return 0;
1058 }
1059
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061                                                 const char *name,
1062                                                 int namelen, u64 dir)
1063 {
1064         struct inode_backref *backref;
1065
1066         list_for_each_entry(backref, &rec->backrefs, list) {
1067                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068                         break;
1069                 if (backref->dir != dir || backref->namelen != namelen)
1070                         continue;
1071                 if (memcmp(name, backref->name, namelen))
1072                         continue;
1073                 return backref;
1074         }
1075
1076         backref = malloc(sizeof(*backref) + namelen + 1);
1077         if (!backref)
1078                 return NULL;
1079         memset(backref, 0, sizeof(*backref));
1080         backref->dir = dir;
1081         backref->namelen = namelen;
1082         memcpy(backref->name, name, namelen);
1083         backref->name[namelen] = '\0';
1084         list_add_tail(&backref->list, &rec->backrefs);
1085         return backref;
1086 }
1087
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089                              u64 ino, u64 dir, u64 index,
1090                              const char *name, int namelen,
1091                              u8 filetype, u8 itemtype, int errors)
1092 {
1093         struct inode_record *rec;
1094         struct inode_backref *backref;
1095
1096         rec = get_inode_rec(inode_cache, ino, 1);
1097         BUG_ON(IS_ERR(rec));
1098         backref = get_inode_backref(rec, name, namelen, dir);
1099         BUG_ON(!backref);
1100         if (errors)
1101                 backref->errors |= errors;
1102         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103                 if (backref->found_dir_index)
1104                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105                 if (backref->found_inode_ref && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 if (backref->found_dir_item && backref->filetype != filetype)
1108                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109
1110                 backref->index = index;
1111                 backref->filetype = filetype;
1112                 backref->found_dir_index = 1;
1113         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114                 rec->found_link++;
1115                 if (backref->found_dir_item)
1116                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117                 if (backref->found_dir_index && backref->filetype != filetype)
1118                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119
1120                 backref->filetype = filetype;
1121                 backref->found_dir_item = 1;
1122         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124                 if (backref->found_inode_ref)
1125                         backref->errors |= REF_ERR_DUP_INODE_REF;
1126                 if (backref->found_dir_index && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 else
1129                         backref->index = index;
1130
1131                 backref->ref_type = itemtype;
1132                 backref->found_inode_ref = 1;
1133         } else {
1134                 BUG_ON(1);
1135         }
1136
1137         maybe_free_inode_rec(inode_cache, rec);
1138         return 0;
1139 }
1140
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142                             struct cache_tree *dst_cache)
1143 {
1144         struct inode_backref *backref;
1145         u32 dir_count = 0;
1146         int ret = 0;
1147
1148         dst->merging = 1;
1149         list_for_each_entry(backref, &src->backrefs, list) {
1150                 if (backref->found_dir_index) {
1151                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1152                                         backref->index, backref->name,
1153                                         backref->namelen, backref->filetype,
1154                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1155                 }
1156                 if (backref->found_dir_item) {
1157                         dir_count++;
1158                         add_inode_backref(dst_cache, dst->ino,
1159                                         backref->dir, 0, backref->name,
1160                                         backref->namelen, backref->filetype,
1161                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1162                 }
1163                 if (backref->found_inode_ref) {
1164                         add_inode_backref(dst_cache, dst->ino,
1165                                         backref->dir, backref->index,
1166                                         backref->name, backref->namelen, 0,
1167                                         backref->ref_type, backref->errors);
1168                 }
1169         }
1170
1171         if (src->found_dir_item)
1172                 dst->found_dir_item = 1;
1173         if (src->found_file_extent)
1174                 dst->found_file_extent = 1;
1175         if (src->found_csum_item)
1176                 dst->found_csum_item = 1;
1177         if (src->some_csum_missing)
1178                 dst->some_csum_missing = 1;
1179         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181                 if (ret < 0)
1182                         return ret;
1183         }
1184
1185         BUG_ON(src->found_link < dir_count);
1186         dst->found_link += src->found_link - dir_count;
1187         dst->found_size += src->found_size;
1188         if (src->extent_start != (u64)-1) {
1189                 if (dst->extent_start == (u64)-1) {
1190                         dst->extent_start = src->extent_start;
1191                         dst->extent_end = src->extent_end;
1192                 } else {
1193                         if (dst->extent_end > src->extent_start)
1194                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195                         else if (dst->extent_end < src->extent_start) {
1196                                 ret = add_file_extent_hole(&dst->holes,
1197                                         dst->extent_end,
1198                                         src->extent_start - dst->extent_end);
1199                         }
1200                         if (dst->extent_end < src->extent_end)
1201                                 dst->extent_end = src->extent_end;
1202                 }
1203         }
1204
1205         dst->errors |= src->errors;
1206         if (src->found_inode_item) {
1207                 if (!dst->found_inode_item) {
1208                         dst->nlink = src->nlink;
1209                         dst->isize = src->isize;
1210                         dst->nbytes = src->nbytes;
1211                         dst->imode = src->imode;
1212                         dst->nodatasum = src->nodatasum;
1213                         dst->found_inode_item = 1;
1214                 } else {
1215                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1216                 }
1217         }
1218         dst->merging = 0;
1219
1220         return 0;
1221 }
1222
1223 static int splice_shared_node(struct shared_node *src_node,
1224                               struct shared_node *dst_node)
1225 {
1226         struct cache_extent *cache;
1227         struct ptr_node *node, *ins;
1228         struct cache_tree *src, *dst;
1229         struct inode_record *rec, *conflict;
1230         u64 current_ino = 0;
1231         int splice = 0;
1232         int ret;
1233
1234         if (--src_node->refs == 0)
1235                 splice = 1;
1236         if (src_node->current)
1237                 current_ino = src_node->current->ino;
1238
1239         src = &src_node->root_cache;
1240         dst = &dst_node->root_cache;
1241 again:
1242         cache = search_cache_extent(src, 0);
1243         while (cache) {
1244                 node = container_of(cache, struct ptr_node, cache);
1245                 rec = node->data;
1246                 cache = next_cache_extent(cache);
1247
1248                 if (splice) {
1249                         remove_cache_extent(src, &node->cache);
1250                         ins = node;
1251                 } else {
1252                         ins = malloc(sizeof(*ins));
1253                         BUG_ON(!ins);
1254                         ins->cache.start = node->cache.start;
1255                         ins->cache.size = node->cache.size;
1256                         ins->data = rec;
1257                         rec->refs++;
1258                 }
1259                 ret = insert_cache_extent(dst, &ins->cache);
1260                 if (ret == -EEXIST) {
1261                         conflict = get_inode_rec(dst, rec->ino, 1);
1262                         BUG_ON(IS_ERR(conflict));
1263                         merge_inode_recs(rec, conflict, dst);
1264                         if (rec->checked) {
1265                                 conflict->checked = 1;
1266                                 if (dst_node->current == conflict)
1267                                         dst_node->current = NULL;
1268                         }
1269                         maybe_free_inode_rec(dst, conflict);
1270                         free_inode_rec(rec);
1271                         free(ins);
1272                 } else {
1273                         BUG_ON(ret);
1274                 }
1275         }
1276
1277         if (src == &src_node->root_cache) {
1278                 src = &src_node->inode_cache;
1279                 dst = &dst_node->inode_cache;
1280                 goto again;
1281         }
1282
1283         if (current_ino > 0 && (!dst_node->current ||
1284             current_ino > dst_node->current->ino)) {
1285                 if (dst_node->current) {
1286                         dst_node->current->checked = 1;
1287                         maybe_free_inode_rec(dst, dst_node->current);
1288                 }
1289                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290                 BUG_ON(IS_ERR(dst_node->current));
1291         }
1292         return 0;
1293 }
1294
1295 static void free_inode_ptr(struct cache_extent *cache)
1296 {
1297         struct ptr_node *node;
1298         struct inode_record *rec;
1299
1300         node = container_of(cache, struct ptr_node, cache);
1301         rec = node->data;
1302         free_inode_rec(rec);
1303         free(node);
1304 }
1305
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309                                             u64 bytenr)
1310 {
1311         struct cache_extent *cache;
1312         struct shared_node *node;
1313
1314         cache = lookup_cache_extent(shared, bytenr, 1);
1315         if (cache) {
1316                 node = container_of(cache, struct shared_node, cache);
1317                 return node;
1318         }
1319         return NULL;
1320 }
1321
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 {
1324         int ret;
1325         struct shared_node *node;
1326
1327         node = calloc(1, sizeof(*node));
1328         if (!node)
1329                 return -ENOMEM;
1330         node->cache.start = bytenr;
1331         node->cache.size = 1;
1332         cache_tree_init(&node->root_cache);
1333         cache_tree_init(&node->inode_cache);
1334         node->refs = refs;
1335
1336         ret = insert_cache_extent(shared, &node->cache);
1337
1338         return ret;
1339 }
1340
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342                              struct walk_control *wc, int level)
1343 {
1344         struct shared_node *node;
1345         struct shared_node *dest;
1346         int ret;
1347
1348         if (level == wc->active_node)
1349                 return 0;
1350
1351         BUG_ON(wc->active_node <= level);
1352         node = find_shared_node(&wc->shared, bytenr);
1353         if (!node) {
1354                 ret = add_shared_node(&wc->shared, bytenr, refs);
1355                 BUG_ON(ret);
1356                 node = find_shared_node(&wc->shared, bytenr);
1357                 wc->nodes[level] = node;
1358                 wc->active_node = level;
1359                 return 0;
1360         }
1361
1362         if (wc->root_level == wc->active_node &&
1363             btrfs_root_refs(&root->root_item) == 0) {
1364                 if (--node->refs == 0) {
1365                         free_inode_recs_tree(&node->root_cache);
1366                         free_inode_recs_tree(&node->inode_cache);
1367                         remove_cache_extent(&wc->shared, &node->cache);
1368                         free(node);
1369                 }
1370                 return 1;
1371         }
1372
1373         dest = wc->nodes[wc->active_node];
1374         splice_shared_node(node, dest);
1375         if (node->refs == 0) {
1376                 remove_cache_extent(&wc->shared, &node->cache);
1377                 free(node);
1378         }
1379         return 1;
1380 }
1381
1382 static int leave_shared_node(struct btrfs_root *root,
1383                              struct walk_control *wc, int level)
1384 {
1385         struct shared_node *node;
1386         struct shared_node *dest;
1387         int i;
1388
1389         if (level == wc->root_level)
1390                 return 0;
1391
1392         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393                 if (wc->nodes[i])
1394                         break;
1395         }
1396         BUG_ON(i >= BTRFS_MAX_LEVEL);
1397
1398         node = wc->nodes[wc->active_node];
1399         wc->nodes[wc->active_node] = NULL;
1400         wc->active_node = i;
1401
1402         dest = wc->nodes[wc->active_node];
1403         if (wc->active_node < wc->root_level ||
1404             btrfs_root_refs(&root->root_item) > 0) {
1405                 BUG_ON(node->refs <= 1);
1406                 splice_shared_node(node, dest);
1407         } else {
1408                 BUG_ON(node->refs < 2);
1409                 node->refs--;
1410         }
1411         return 0;
1412 }
1413
1414 /*
1415  * Returns:
1416  * < 0 - on error
1417  * 1   - if the root with id child_root_id is a child of root parent_root_id
1418  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1419  *       has other root(s) as parent(s)
1420  * 2   - if the root child_root_id doesn't have any parent roots
1421  */
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423                          u64 child_root_id)
1424 {
1425         struct btrfs_path path;
1426         struct btrfs_key key;
1427         struct extent_buffer *leaf;
1428         int has_parent = 0;
1429         int ret;
1430
1431         btrfs_init_path(&path);
1432
1433         key.objectid = parent_root_id;
1434         key.type = BTRFS_ROOT_REF_KEY;
1435         key.offset = child_root_id;
1436         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437                                 0, 0);
1438         if (ret < 0)
1439                 return ret;
1440         btrfs_release_path(&path);
1441         if (!ret)
1442                 return 1;
1443
1444         key.objectid = child_root_id;
1445         key.type = BTRFS_ROOT_BACKREF_KEY;
1446         key.offset = 0;
1447         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448                                 0, 0);
1449         if (ret < 0)
1450                 goto out;
1451
1452         while (1) {
1453                 leaf = path.nodes[0];
1454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456                         if (ret)
1457                                 break;
1458                         leaf = path.nodes[0];
1459                 }
1460
1461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462                 if (key.objectid != child_root_id ||
1463                     key.type != BTRFS_ROOT_BACKREF_KEY)
1464                         break;
1465
1466                 has_parent = 1;
1467
1468                 if (key.offset == parent_root_id) {
1469                         btrfs_release_path(&path);
1470                         return 1;
1471                 }
1472
1473                 path.slots[0]++;
1474         }
1475 out:
1476         btrfs_release_path(&path);
1477         if (ret < 0)
1478                 return ret;
1479         return has_parent ? 0 : 2;
1480 }
1481
1482 static int process_dir_item(struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (cur + sizeof(*di) + name_len > total ||
1517                     name_len > BTRFS_NAME_LEN) {
1518                         error = REF_ERR_NAME_TOO_LONG;
1519
1520                         if (cur + sizeof(*di) > total)
1521                                 break;
1522                         len = min_t(u32, total - cur - sizeof(*di),
1523                                     BTRFS_NAME_LEN);
1524                 } else {
1525                         len = name_len;
1526                         error = 0;
1527                 }
1528
1529                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530
1531                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1532                     key->offset != btrfs_name_hash(namebuf, len)) {
1533                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1534                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1535                         key->objectid, key->offset, namebuf, len, filetype,
1536                         key->offset, btrfs_name_hash(namebuf, len));
1537                 }
1538
1539                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1540                         add_inode_backref(inode_cache, location.objectid,
1541                                           key->objectid, key->offset, namebuf,
1542                                           len, filetype, key->type, error);
1543                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1544                         add_inode_backref(root_cache, location.objectid,
1545                                           key->objectid, key->offset,
1546                                           namebuf, len, filetype,
1547                                           key->type, error);
1548                 } else {
1549                         fprintf(stderr, "invalid location in dir item %u\n",
1550                                 location.type);
1551                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1552                                           key->objectid, key->offset, namebuf,
1553                                           len, filetype, key->type, error);
1554                 }
1555
1556                 len = sizeof(*di) + name_len + data_len;
1557                 di = (struct btrfs_dir_item *)((char *)di + len);
1558                 cur += len;
1559         }
1560         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1561                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1562
1563         return 0;
1564 }
1565
1566 static int process_inode_ref(struct extent_buffer *eb,
1567                              int slot, struct btrfs_key *key,
1568                              struct shared_node *active_node)
1569 {
1570         u32 total;
1571         u32 cur = 0;
1572         u32 len;
1573         u32 name_len;
1574         u64 index;
1575         int error;
1576         struct cache_tree *inode_cache;
1577         struct btrfs_inode_ref *ref;
1578         char namebuf[BTRFS_NAME_LEN];
1579
1580         inode_cache = &active_node->inode_cache;
1581
1582         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1583         total = btrfs_item_size_nr(eb, slot);
1584         while (cur < total) {
1585                 name_len = btrfs_inode_ref_name_len(eb, ref);
1586                 index = btrfs_inode_ref_index(eb, ref);
1587
1588                 /* inode_ref + namelen should not cross item boundary */
1589                 if (cur + sizeof(*ref) + name_len > total ||
1590                     name_len > BTRFS_NAME_LEN) {
1591                         if (total < cur + sizeof(*ref))
1592                                 break;
1593
1594                         /* Still try to read out the remaining part */
1595                         len = min_t(u32, total - cur - sizeof(*ref),
1596                                     BTRFS_NAME_LEN);
1597                         error = REF_ERR_NAME_TOO_LONG;
1598                 } else {
1599                         len = name_len;
1600                         error = 0;
1601                 }
1602
1603                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, key->offset,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*ref) + name_len;
1608                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612 }
1613
1614 static int process_inode_extref(struct extent_buffer *eb,
1615                                 int slot, struct btrfs_key *key,
1616                                 struct shared_node *active_node)
1617 {
1618         u32 total;
1619         u32 cur = 0;
1620         u32 len;
1621         u32 name_len;
1622         u64 index;
1623         u64 parent;
1624         int error;
1625         struct cache_tree *inode_cache;
1626         struct btrfs_inode_extref *extref;
1627         char namebuf[BTRFS_NAME_LEN];
1628
1629         inode_cache = &active_node->inode_cache;
1630
1631         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1632         total = btrfs_item_size_nr(eb, slot);
1633         while (cur < total) {
1634                 name_len = btrfs_inode_extref_name_len(eb, extref);
1635                 index = btrfs_inode_extref_index(eb, extref);
1636                 parent = btrfs_inode_extref_parent(eb, extref);
1637                 if (name_len <= BTRFS_NAME_LEN) {
1638                         len = name_len;
1639                         error = 0;
1640                 } else {
1641                         len = BTRFS_NAME_LEN;
1642                         error = REF_ERR_NAME_TOO_LONG;
1643                 }
1644                 read_extent_buffer(eb, namebuf,
1645                                    (unsigned long)(extref + 1), len);
1646                 add_inode_backref(inode_cache, key->objectid, parent,
1647                                   index, namebuf, len, 0, key->type, error);
1648
1649                 len = sizeof(*extref) + name_len;
1650                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1651                 cur += len;
1652         }
1653         return 0;
1654
1655 }
1656
1657 static int count_csum_range(struct btrfs_root *root, u64 start,
1658                             u64 len, u64 *found)
1659 {
1660         struct btrfs_key key;
1661         struct btrfs_path path;
1662         struct extent_buffer *leaf;
1663         int ret;
1664         size_t size;
1665         *found = 0;
1666         u64 csum_end;
1667         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1668
1669         btrfs_init_path(&path);
1670
1671         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672         key.offset = start;
1673         key.type = BTRFS_EXTENT_CSUM_KEY;
1674
1675         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1676                                 &key, &path, 0, 0);
1677         if (ret < 0)
1678                 goto out;
1679         if (ret > 0 && path.slots[0] > 0) {
1680                 leaf = path.nodes[0];
1681                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1682                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1683                     key.type == BTRFS_EXTENT_CSUM_KEY)
1684                         path.slots[0]--;
1685         }
1686
1687         while (len > 0) {
1688                 leaf = path.nodes[0];
1689                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1690                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1691                         if (ret > 0)
1692                                 break;
1693                         else if (ret < 0)
1694                                 goto out;
1695                         leaf = path.nodes[0];
1696                 }
1697
1698                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1699                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1700                     key.type != BTRFS_EXTENT_CSUM_KEY)
1701                         break;
1702
1703                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1704                 if (key.offset >= start + len)
1705                         break;
1706
1707                 if (key.offset > start)
1708                         start = key.offset;
1709
1710                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1711                 csum_end = key.offset + (size / csum_size) *
1712                            root->fs_info->sectorsize;
1713                 if (csum_end > start) {
1714                         size = min(csum_end - start, len);
1715                         len -= size;
1716                         start += size;
1717                         *found += size;
1718                 }
1719
1720                 path.slots[0]++;
1721         }
1722 out:
1723         btrfs_release_path(&path);
1724         if (ret < 0)
1725                 return ret;
1726         return 0;
1727 }
1728
1729 static int process_file_extent(struct btrfs_root *root,
1730                                 struct extent_buffer *eb,
1731                                 int slot, struct btrfs_key *key,
1732                                 struct shared_node *active_node)
1733 {
1734         struct inode_record *rec;
1735         struct btrfs_file_extent_item *fi;
1736         u64 num_bytes = 0;
1737         u64 disk_bytenr = 0;
1738         u64 extent_offset = 0;
1739         u64 mask = root->fs_info->sectorsize - 1;
1740         int extent_type;
1741         int ret;
1742
1743         rec = active_node->current;
1744         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1745         rec->found_file_extent = 1;
1746
1747         if (rec->extent_start == (u64)-1) {
1748                 rec->extent_start = key->offset;
1749                 rec->extent_end = key->offset;
1750         }
1751
1752         if (rec->extent_end > key->offset)
1753                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1754         else if (rec->extent_end < key->offset) {
1755                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1756                                            key->offset - rec->extent_end);
1757                 if (ret < 0)
1758                         return ret;
1759         }
1760
1761         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1762         extent_type = btrfs_file_extent_type(eb, fi);
1763
1764         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1765                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1766                 if (num_bytes == 0)
1767                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1768                 rec->found_size += num_bytes;
1769                 num_bytes = (num_bytes + mask) & ~mask;
1770         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1771                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1772                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1773                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1774                 extent_offset = btrfs_file_extent_offset(eb, fi);
1775                 if (num_bytes == 0 || (num_bytes & mask))
1776                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777                 if (num_bytes + extent_offset >
1778                     btrfs_file_extent_ram_bytes(eb, fi))
1779                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1781                     (btrfs_file_extent_compression(eb, fi) ||
1782                      btrfs_file_extent_encryption(eb, fi) ||
1783                      btrfs_file_extent_other_encoding(eb, fi)))
1784                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785                 if (disk_bytenr > 0)
1786                         rec->found_size += num_bytes;
1787         } else {
1788                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1789         }
1790         rec->extent_end = key->offset + num_bytes;
1791
1792         /*
1793          * The data reloc tree will copy full extents into its inode and then
1794          * copy the corresponding csums.  Because the extent it copied could be
1795          * a preallocated extent that hasn't been written to yet there may be no
1796          * csums to copy, ergo we won't have csums for our file extent.  This is
1797          * ok so just don't bother checking csums if the inode belongs to the
1798          * data reloc tree.
1799          */
1800         if (disk_bytenr > 0 &&
1801             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1802                 u64 found;
1803                 if (btrfs_file_extent_compression(eb, fi))
1804                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1805                 else
1806                         disk_bytenr += extent_offset;
1807
1808                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809                 if (ret < 0)
1810                         return ret;
1811                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1812                         if (found > 0)
1813                                 rec->found_csum_item = 1;
1814                         if (found < num_bytes)
1815                                 rec->some_csum_missing = 1;
1816                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1817                         if (found > 0)
1818                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1819                 }
1820         }
1821         return 0;
1822 }
1823
1824 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1825                             struct walk_control *wc)
1826 {
1827         struct btrfs_key key;
1828         u32 nritems;
1829         int i;
1830         int ret = 0;
1831         struct cache_tree *inode_cache;
1832         struct shared_node *active_node;
1833
1834         if (wc->root_level == wc->active_node &&
1835             btrfs_root_refs(&root->root_item) == 0)
1836                 return 0;
1837
1838         active_node = wc->nodes[wc->active_node];
1839         inode_cache = &active_node->inode_cache;
1840         nritems = btrfs_header_nritems(eb);
1841         for (i = 0; i < nritems; i++) {
1842                 btrfs_item_key_to_cpu(eb, &key, i);
1843
1844                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1845                         continue;
1846                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847                         continue;
1848
1849                 if (active_node->current == NULL ||
1850                     active_node->current->ino < key.objectid) {
1851                         if (active_node->current) {
1852                                 active_node->current->checked = 1;
1853                                 maybe_free_inode_rec(inode_cache,
1854                                                      active_node->current);
1855                         }
1856                         active_node->current = get_inode_rec(inode_cache,
1857                                                              key.objectid, 1);
1858                         BUG_ON(IS_ERR(active_node->current));
1859                 }
1860                 switch (key.type) {
1861                 case BTRFS_DIR_ITEM_KEY:
1862                 case BTRFS_DIR_INDEX_KEY:
1863                         ret = process_dir_item(eb, i, &key, active_node);
1864                         break;
1865                 case BTRFS_INODE_REF_KEY:
1866                         ret = process_inode_ref(eb, i, &key, active_node);
1867                         break;
1868                 case BTRFS_INODE_EXTREF_KEY:
1869                         ret = process_inode_extref(eb, i, &key, active_node);
1870                         break;
1871                 case BTRFS_INODE_ITEM_KEY:
1872                         ret = process_inode_item(eb, i, &key, active_node);
1873                         break;
1874                 case BTRFS_EXTENT_DATA_KEY:
1875                         ret = process_file_extent(root, eb, i, &key,
1876                                                   active_node);
1877                         break;
1878                 default:
1879                         break;
1880                 };
1881         }
1882         return ret;
1883 }
1884
1885 struct node_refs {
1886         u64 bytenr[BTRFS_MAX_LEVEL];
1887         u64 refs[BTRFS_MAX_LEVEL];
1888         int need_check[BTRFS_MAX_LEVEL];
1889 };
1890
1891 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1892                              struct node_refs *nrefs, u64 level);
1893 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1894                             unsigned int ext_ref);
1895
1896 /*
1897  * Returns >0  Found error, not fatal, should continue
1898  * Returns <0  Fatal error, must exit the whole check
1899  * Returns 0   No errors found
1900  */
1901 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1902                                struct node_refs *nrefs, int *level, int ext_ref)
1903 {
1904         struct extent_buffer *cur = path->nodes[0];
1905         struct btrfs_key key;
1906         u64 cur_bytenr;
1907         u32 nritems;
1908         u64 first_ino = 0;
1909         int root_level = btrfs_header_level(root->node);
1910         int i;
1911         int ret = 0; /* Final return value */
1912         int err = 0; /* Positive error bitmap */
1913
1914         cur_bytenr = cur->start;
1915
1916         /* skip to first inode item or the first inode number change */
1917         nritems = btrfs_header_nritems(cur);
1918         for (i = 0; i < nritems; i++) {
1919                 btrfs_item_key_to_cpu(cur, &key, i);
1920                 if (i == 0)
1921                         first_ino = key.objectid;
1922                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1923                     (first_ino && first_ino != key.objectid))
1924                         break;
1925         }
1926         if (i == nritems) {
1927                 path->slots[0] = nritems;
1928                 return 0;
1929         }
1930         path->slots[0] = i;
1931
1932 again:
1933         err |= check_inode_item(root, path, ext_ref);
1934
1935         if (err & LAST_ITEM)
1936                 goto out;
1937
1938         /* still have inode items in thie leaf */
1939         if (cur->start == cur_bytenr)
1940                 goto again;
1941
1942         /*
1943          * we have switched to another leaf, above nodes may
1944          * have changed, here walk down the path, if a node
1945          * or leaf is shared, check whether we can skip this
1946          * node or leaf.
1947          */
1948         for (i = root_level; i >= 0; i--) {
1949                 if (path->nodes[i]->start == nrefs->bytenr[i])
1950                         continue;
1951
1952                 ret = update_nodes_refs(root,
1953                                 path->nodes[i]->start,
1954                                 nrefs, i);
1955                 if (ret)
1956                         goto out;
1957
1958                 if (!nrefs->need_check[i]) {
1959                         *level += 1;
1960                         break;
1961                 }
1962         }
1963
1964         for (i = 0; i < *level; i++) {
1965                 free_extent_buffer(path->nodes[i]);
1966                 path->nodes[i] = NULL;
1967         }
1968 out:
1969         err &= ~LAST_ITEM;
1970         if (err && !ret)
1971                 ret = err;
1972         return ret;
1973 }
1974
1975 static void reada_walk_down(struct btrfs_root *root,
1976                             struct extent_buffer *node, int slot)
1977 {
1978         struct btrfs_fs_info *fs_info = root->fs_info;
1979         u64 bytenr;
1980         u64 ptr_gen;
1981         u32 nritems;
1982         int i;
1983         int level;
1984
1985         level = btrfs_header_level(node);
1986         if (level != 1)
1987                 return;
1988
1989         nritems = btrfs_header_nritems(node);
1990         for (i = slot; i < nritems; i++) {
1991                 bytenr = btrfs_node_blockptr(node, i);
1992                 ptr_gen = btrfs_node_ptr_generation(node, i);
1993                 readahead_tree_block(fs_info, bytenr, fs_info->nodesize,
1994                                 ptr_gen);
1995         }
1996 }
1997
1998 /*
1999  * Check the child node/leaf by the following condition:
2000  * 1. the first item key of the node/leaf should be the same with the one
2001  *    in parent.
2002  * 2. block in parent node should match the child node/leaf.
2003  * 3. generation of parent node and child's header should be consistent.
2004  *
2005  * Or the child node/leaf pointed by the key in parent is not valid.
2006  *
2007  * We hope to check leaf owner too, but since subvol may share leaves,
2008  * which makes leaf owner check not so strong, key check should be
2009  * sufficient enough for that case.
2010  */
2011 static int check_child_node(struct extent_buffer *parent, int slot,
2012                             struct extent_buffer *child)
2013 {
2014         struct btrfs_key parent_key;
2015         struct btrfs_key child_key;
2016         int ret = 0;
2017
2018         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2019         if (btrfs_header_level(child) == 0)
2020                 btrfs_item_key_to_cpu(child, &child_key, 0);
2021         else
2022                 btrfs_node_key_to_cpu(child, &child_key, 0);
2023
2024         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2025                 ret = -EINVAL;
2026                 fprintf(stderr,
2027                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2028                         parent_key.objectid, parent_key.type, parent_key.offset,
2029                         child_key.objectid, child_key.type, child_key.offset);
2030         }
2031         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2032                 ret = -EINVAL;
2033                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2034                         btrfs_node_blockptr(parent, slot),
2035                         btrfs_header_bytenr(child));
2036         }
2037         if (btrfs_node_ptr_generation(parent, slot) !=
2038             btrfs_header_generation(child)) {
2039                 ret = -EINVAL;
2040                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2041                         btrfs_header_generation(child),
2042                         btrfs_node_ptr_generation(parent, slot));
2043         }
2044         return ret;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2049  * in every fs or file tree check. Here we find its all root ids, and only check
2050  * it in the fs or file tree which has the smallest root id.
2051  */
2052 static int need_check(struct btrfs_root *root, struct ulist *roots)
2053 {
2054         struct rb_node *node;
2055         struct ulist_node *u;
2056
2057         if (roots->nnodes == 1)
2058                 return 1;
2059
2060         node = rb_first(&roots->root);
2061         u = rb_entry(node, struct ulist_node, rb_node);
2062         /*
2063          * current root id is not smallest, we skip it and let it be checked
2064          * in the fs or file tree who hash the smallest root id.
2065          */
2066         if (root->objectid != u->val)
2067                 return 0;
2068
2069         return 1;
2070 }
2071
2072 /*
2073  * for a tree node or leaf, we record its reference count, so later if we still
2074  * process this node or leaf, don't need to compute its reference count again.
2075  */
2076 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2077                              struct node_refs *nrefs, u64 level)
2078 {
2079         int check, ret;
2080         u64 refs;
2081         struct ulist *roots;
2082
2083         if (nrefs->bytenr[level] != bytenr) {
2084                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2085                                        level, 1, &refs, NULL);
2086                 if (ret < 0)
2087                         return ret;
2088
2089                 nrefs->bytenr[level] = bytenr;
2090                 nrefs->refs[level] = refs;
2091                 if (refs > 1) {
2092                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2093                                                    0, &roots);
2094                         if (ret)
2095                                 return -EIO;
2096
2097                         check = need_check(root, roots);
2098                         ulist_free(roots);
2099                         nrefs->need_check[level] = check;
2100                 } else {
2101                         nrefs->need_check[level] = 1;
2102                 }
2103         }
2104
2105         return 0;
2106 }
2107
2108 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2109                           struct walk_control *wc, int *level,
2110                           struct node_refs *nrefs)
2111 {
2112         enum btrfs_tree_block_status status;
2113         u64 bytenr;
2114         u64 ptr_gen;
2115         struct btrfs_fs_info *fs_info = root->fs_info;
2116         struct extent_buffer *next;
2117         struct extent_buffer *cur;
2118         int ret, err = 0;
2119         u64 refs;
2120
2121         WARN_ON(*level < 0);
2122         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2123
2124         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2125                 refs = nrefs->refs[*level];
2126                 ret = 0;
2127         } else {
2128                 ret = btrfs_lookup_extent_info(NULL, root,
2129                                        path->nodes[*level]->start,
2130                                        *level, 1, &refs, NULL);
2131                 if (ret < 0) {
2132                         err = ret;
2133                         goto out;
2134                 }
2135                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2136                 nrefs->refs[*level] = refs;
2137         }
2138
2139         if (refs > 1) {
2140                 ret = enter_shared_node(root, path->nodes[*level]->start,
2141                                         refs, wc, *level);
2142                 if (ret > 0) {
2143                         err = ret;
2144                         goto out;
2145                 }
2146         }
2147
2148         while (*level >= 0) {
2149                 WARN_ON(*level < 0);
2150                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2151                 cur = path->nodes[*level];
2152
2153                 if (btrfs_header_level(cur) != *level)
2154                         WARN_ON(1);
2155
2156                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2157                         break;
2158                 if (*level == 0) {
2159                         ret = process_one_leaf(root, cur, wc);
2160                         if (ret < 0)
2161                                 err = ret;
2162                         break;
2163                 }
2164                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2165                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2166
2167                 if (bytenr == nrefs->bytenr[*level - 1]) {
2168                         refs = nrefs->refs[*level - 1];
2169                 } else {
2170                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2171                                         *level - 1, 1, &refs, NULL);
2172                         if (ret < 0) {
2173                                 refs = 0;
2174                         } else {
2175                                 nrefs->bytenr[*level - 1] = bytenr;
2176                                 nrefs->refs[*level - 1] = refs;
2177                         }
2178                 }
2179
2180                 if (refs > 1) {
2181                         ret = enter_shared_node(root, bytenr, refs,
2182                                                 wc, *level - 1);
2183                         if (ret > 0) {
2184                                 path->slots[*level]++;
2185                                 continue;
2186                         }
2187                 }
2188
2189                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2190                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2191                         free_extent_buffer(next);
2192                         reada_walk_down(root, cur, path->slots[*level]);
2193                         next = read_tree_block(root->fs_info, bytenr,
2194                                         fs_info->nodesize, ptr_gen);
2195                         if (!extent_buffer_uptodate(next)) {
2196                                 struct btrfs_key node_key;
2197
2198                                 btrfs_node_key_to_cpu(path->nodes[*level],
2199                                                       &node_key,
2200                                                       path->slots[*level]);
2201                                 btrfs_add_corrupt_extent_record(root->fs_info,
2202                                                 &node_key,
2203                                                 path->nodes[*level]->start,
2204                                                 root->fs_info->nodesize,
2205                                                 *level);
2206                                 err = -EIO;
2207                                 goto out;
2208                         }
2209                 }
2210
2211                 ret = check_child_node(cur, path->slots[*level], next);
2212                 if (ret) {
2213                         free_extent_buffer(next);
2214                         err = ret;
2215                         goto out;
2216                 }
2217
2218                 if (btrfs_is_leaf(next))
2219                         status = btrfs_check_leaf(root, NULL, next);
2220                 else
2221                         status = btrfs_check_node(root, NULL, next);
2222                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2223                         free_extent_buffer(next);
2224                         err = -EIO;
2225                         goto out;
2226                 }
2227
2228                 *level = *level - 1;
2229                 free_extent_buffer(path->nodes[*level]);
2230                 path->nodes[*level] = next;
2231                 path->slots[*level] = 0;
2232         }
2233 out:
2234         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2235         return err;
2236 }
2237
2238 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2239                             unsigned int ext_ref);
2240
2241 /*
2242  * Returns >0  Found error, should continue
2243  * Returns <0  Fatal error, must exit the whole check
2244  * Returns 0   No errors found
2245  */
2246 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2247                              int *level, struct node_refs *nrefs, int ext_ref)
2248 {
2249         enum btrfs_tree_block_status status;
2250         u64 bytenr;
2251         u64 ptr_gen;
2252         struct btrfs_fs_info *fs_info = root->fs_info;
2253         struct extent_buffer *next;
2254         struct extent_buffer *cur;
2255         int ret;
2256
2257         WARN_ON(*level < 0);
2258         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2259
2260         ret = update_nodes_refs(root, path->nodes[*level]->start,
2261                                 nrefs, *level);
2262         if (ret < 0)
2263                 return ret;
2264
2265         while (*level >= 0) {
2266                 WARN_ON(*level < 0);
2267                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2268                 cur = path->nodes[*level];
2269
2270                 if (btrfs_header_level(cur) != *level)
2271                         WARN_ON(1);
2272
2273                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2274                         break;
2275                 /* Don't forgot to check leaf/node validation */
2276                 if (*level == 0) {
2277                         ret = btrfs_check_leaf(root, NULL, cur);
2278                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2279                                 ret = -EIO;
2280                                 break;
2281                         }
2282                         ret = process_one_leaf_v2(root, path, nrefs,
2283                                                   level, ext_ref);
2284                         break;
2285                 } else {
2286                         ret = btrfs_check_node(root, NULL, cur);
2287                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2288                                 ret = -EIO;
2289                                 break;
2290                         }
2291                 }
2292                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2293                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2294
2295                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2296                 if (ret)
2297                         break;
2298                 if (!nrefs->need_check[*level - 1]) {
2299                         path->slots[*level]++;
2300                         continue;
2301                 }
2302
2303                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2304                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2305                         free_extent_buffer(next);
2306                         reada_walk_down(root, cur, path->slots[*level]);
2307                         next = read_tree_block(fs_info, bytenr,
2308                                         fs_info->nodesize, ptr_gen);
2309                         if (!extent_buffer_uptodate(next)) {
2310                                 struct btrfs_key node_key;
2311
2312                                 btrfs_node_key_to_cpu(path->nodes[*level],
2313                                                       &node_key,
2314                                                       path->slots[*level]);
2315                                 btrfs_add_corrupt_extent_record(fs_info,
2316                                                 &node_key,
2317                                                 path->nodes[*level]->start,
2318                                                 fs_info->nodesize,
2319                                                 *level);
2320                                 ret = -EIO;
2321                                 break;
2322                         }
2323                 }
2324
2325                 ret = check_child_node(cur, path->slots[*level], next);
2326                 if (ret < 0) 
2327                         break;
2328
2329                 if (btrfs_is_leaf(next))
2330                         status = btrfs_check_leaf(root, NULL, next);
2331                 else
2332                         status = btrfs_check_node(root, NULL, next);
2333                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2334                         free_extent_buffer(next);
2335                         ret = -EIO;
2336                         break;
2337                 }
2338
2339                 *level = *level - 1;
2340                 free_extent_buffer(path->nodes[*level]);
2341                 path->nodes[*level] = next;
2342                 path->slots[*level] = 0;
2343         }
2344         return ret;
2345 }
2346
2347 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2348                         struct walk_control *wc, int *level)
2349 {
2350         int i;
2351         struct extent_buffer *leaf;
2352
2353         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2354                 leaf = path->nodes[i];
2355                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2356                         path->slots[i]++;
2357                         *level = i;
2358                         return 0;
2359                 } else {
2360                         free_extent_buffer(path->nodes[*level]);
2361                         path->nodes[*level] = NULL;
2362                         BUG_ON(*level > wc->active_node);
2363                         if (*level == wc->active_node)
2364                                 leave_shared_node(root, wc, *level);
2365                         *level = i + 1;
2366                 }
2367         }
2368         return 1;
2369 }
2370
2371 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2372                            int *level)
2373 {
2374         int i;
2375         struct extent_buffer *leaf;
2376
2377         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2378                 leaf = path->nodes[i];
2379                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2380                         path->slots[i]++;
2381                         *level = i;
2382                         return 0;
2383                 } else {
2384                         free_extent_buffer(path->nodes[*level]);
2385                         path->nodes[*level] = NULL;
2386                         *level = i + 1;
2387                 }
2388         }
2389         return 1;
2390 }
2391
2392 static int check_root_dir(struct inode_record *rec)
2393 {
2394         struct inode_backref *backref;
2395         int ret = -1;
2396
2397         if (!rec->found_inode_item || rec->errors)
2398                 goto out;
2399         if (rec->nlink != 1 || rec->found_link != 0)
2400                 goto out;
2401         if (list_empty(&rec->backrefs))
2402                 goto out;
2403         backref = to_inode_backref(rec->backrefs.next);
2404         if (!backref->found_inode_ref)
2405                 goto out;
2406         if (backref->index != 0 || backref->namelen != 2 ||
2407             memcmp(backref->name, "..", 2))
2408                 goto out;
2409         if (backref->found_dir_index || backref->found_dir_item)
2410                 goto out;
2411         ret = 0;
2412 out:
2413         return ret;
2414 }
2415
2416 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2417                               struct btrfs_root *root, struct btrfs_path *path,
2418                               struct inode_record *rec)
2419 {
2420         struct btrfs_inode_item *ei;
2421         struct btrfs_key key;
2422         int ret;
2423
2424         key.objectid = rec->ino;
2425         key.type = BTRFS_INODE_ITEM_KEY;
2426         key.offset = (u64)-1;
2427
2428         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2429         if (ret < 0)
2430                 goto out;
2431         if (ret) {
2432                 if (!path->slots[0]) {
2433                         ret = -ENOENT;
2434                         goto out;
2435                 }
2436                 path->slots[0]--;
2437                 ret = 0;
2438         }
2439         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2440         if (key.objectid != rec->ino) {
2441                 ret = -ENOENT;
2442                 goto out;
2443         }
2444
2445         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2446                             struct btrfs_inode_item);
2447         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2448         btrfs_mark_buffer_dirty(path->nodes[0]);
2449         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2450         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2451                root->root_key.objectid);
2452 out:
2453         btrfs_release_path(path);
2454         return ret;
2455 }
2456
2457 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2458                                     struct btrfs_root *root,
2459                                     struct btrfs_path *path,
2460                                     struct inode_record *rec)
2461 {
2462         int ret;
2463
2464         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2465         btrfs_release_path(path);
2466         if (!ret)
2467                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2468         return ret;
2469 }
2470
2471 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2472                                struct btrfs_root *root,
2473                                struct btrfs_path *path,
2474                                struct inode_record *rec)
2475 {
2476         struct btrfs_inode_item *ei;
2477         struct btrfs_key key;
2478         int ret = 0;
2479
2480         key.objectid = rec->ino;
2481         key.type = BTRFS_INODE_ITEM_KEY;
2482         key.offset = 0;
2483
2484         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2485         if (ret) {
2486                 if (ret > 0)
2487                         ret = -ENOENT;
2488                 goto out;
2489         }
2490
2491         /* Since ret == 0, no need to check anything */
2492         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2493                             struct btrfs_inode_item);
2494         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2495         btrfs_mark_buffer_dirty(path->nodes[0]);
2496         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2497         printf("reset nbytes for ino %llu root %llu\n",
2498                rec->ino, root->root_key.objectid);
2499 out:
2500         btrfs_release_path(path);
2501         return ret;
2502 }
2503
2504 static int add_missing_dir_index(struct btrfs_root *root,
2505                                  struct cache_tree *inode_cache,
2506                                  struct inode_record *rec,
2507                                  struct inode_backref *backref)
2508 {
2509         struct btrfs_path path;
2510         struct btrfs_trans_handle *trans;
2511         struct btrfs_dir_item *dir_item;
2512         struct extent_buffer *leaf;
2513         struct btrfs_key key;
2514         struct btrfs_disk_key disk_key;
2515         struct inode_record *dir_rec;
2516         unsigned long name_ptr;
2517         u32 data_size = sizeof(*dir_item) + backref->namelen;
2518         int ret;
2519
2520         trans = btrfs_start_transaction(root, 1);
2521         if (IS_ERR(trans))
2522                 return PTR_ERR(trans);
2523
2524         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2525                 (unsigned long long)rec->ino);
2526
2527         btrfs_init_path(&path);
2528         key.objectid = backref->dir;
2529         key.type = BTRFS_DIR_INDEX_KEY;
2530         key.offset = backref->index;
2531         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2532         BUG_ON(ret);
2533
2534         leaf = path.nodes[0];
2535         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2536
2537         disk_key.objectid = cpu_to_le64(rec->ino);
2538         disk_key.type = BTRFS_INODE_ITEM_KEY;
2539         disk_key.offset = 0;
2540
2541         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2542         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2543         btrfs_set_dir_data_len(leaf, dir_item, 0);
2544         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2545         name_ptr = (unsigned long)(dir_item + 1);
2546         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2547         btrfs_mark_buffer_dirty(leaf);
2548         btrfs_release_path(&path);
2549         btrfs_commit_transaction(trans, root);
2550
2551         backref->found_dir_index = 1;
2552         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2553         BUG_ON(IS_ERR(dir_rec));
2554         if (!dir_rec)
2555                 return 0;
2556         dir_rec->found_size += backref->namelen;
2557         if (dir_rec->found_size == dir_rec->isize &&
2558             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2559                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2560         if (dir_rec->found_size != dir_rec->isize)
2561                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2562
2563         return 0;
2564 }
2565
2566 static int delete_dir_index(struct btrfs_root *root,
2567                             struct inode_backref *backref)
2568 {
2569         struct btrfs_trans_handle *trans;
2570         struct btrfs_dir_item *di;
2571         struct btrfs_path path;
2572         int ret = 0;
2573
2574         trans = btrfs_start_transaction(root, 1);
2575         if (IS_ERR(trans))
2576                 return PTR_ERR(trans);
2577
2578         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2579                 (unsigned long long)backref->dir,
2580                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2581                 (unsigned long long)root->objectid);
2582
2583         btrfs_init_path(&path);
2584         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2585                                     backref->name, backref->namelen,
2586                                     backref->index, -1);
2587         if (IS_ERR(di)) {
2588                 ret = PTR_ERR(di);
2589                 btrfs_release_path(&path);
2590                 btrfs_commit_transaction(trans, root);
2591                 if (ret == -ENOENT)
2592                         return 0;
2593                 return ret;
2594         }
2595
2596         if (!di)
2597                 ret = btrfs_del_item(trans, root, &path);
2598         else
2599                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2600         BUG_ON(ret);
2601         btrfs_release_path(&path);
2602         btrfs_commit_transaction(trans, root);
2603         return ret;
2604 }
2605
2606 static int create_inode_item(struct btrfs_root *root,
2607                              struct inode_record *rec,
2608                              int root_dir)
2609 {
2610         struct btrfs_trans_handle *trans;
2611         struct btrfs_inode_item inode_item;
2612         time_t now = time(NULL);
2613         int ret;
2614
2615         trans = btrfs_start_transaction(root, 1);
2616         if (IS_ERR(trans)) {
2617                 ret = PTR_ERR(trans);
2618                 return ret;
2619         }
2620
2621         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2622                 "be incomplete, please check permissions and content after "
2623                 "the fsck completes.\n", (unsigned long long)root->objectid,
2624                 (unsigned long long)rec->ino);
2625
2626         memset(&inode_item, 0, sizeof(inode_item));
2627         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2628         if (root_dir)
2629                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2630         else
2631                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2632         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2633         if (rec->found_dir_item) {
2634                 if (rec->found_file_extent)
2635                         fprintf(stderr, "root %llu inode %llu has both a dir "
2636                                 "item and extents, unsure if it is a dir or a "
2637                                 "regular file so setting it as a directory\n",
2638                                 (unsigned long long)root->objectid,
2639                                 (unsigned long long)rec->ino);
2640                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2641                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2642         } else if (!rec->found_dir_item) {
2643                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2644                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2645         }
2646         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2647         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2648         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2649         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2650         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2651         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2652         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2653         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2654
2655         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2656         BUG_ON(ret);
2657         btrfs_commit_transaction(trans, root);
2658         return 0;
2659 }
2660
2661 static int repair_inode_backrefs(struct btrfs_root *root,
2662                                  struct inode_record *rec,
2663                                  struct cache_tree *inode_cache,
2664                                  int delete)
2665 {
2666         struct inode_backref *tmp, *backref;
2667         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2668         int ret = 0;
2669         int repaired = 0;
2670
2671         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2672                 if (!delete && rec->ino == root_dirid) {
2673                         if (!rec->found_inode_item) {
2674                                 ret = create_inode_item(root, rec, 1);
2675                                 if (ret)
2676                                         break;
2677                                 repaired++;
2678                         }
2679                 }
2680
2681                 /* Index 0 for root dir's are special, don't mess with it */
2682                 if (rec->ino == root_dirid && backref->index == 0)
2683                         continue;
2684
2685                 if (delete &&
2686                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2687                      (backref->found_dir_index && backref->found_inode_ref &&
2688                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2689                         ret = delete_dir_index(root, backref);
2690                         if (ret)
2691                                 break;
2692                         repaired++;
2693                         list_del(&backref->list);
2694                         free(backref);
2695                         continue;
2696                 }
2697
2698                 if (!delete && !backref->found_dir_index &&
2699                     backref->found_dir_item && backref->found_inode_ref) {
2700                         ret = add_missing_dir_index(root, inode_cache, rec,
2701                                                     backref);
2702                         if (ret)
2703                                 break;
2704                         repaired++;
2705                         if (backref->found_dir_item &&
2706                             backref->found_dir_index) {
2707                                 if (!backref->errors &&
2708                                     backref->found_inode_ref) {
2709                                         list_del(&backref->list);
2710                                         free(backref);
2711                                         continue;
2712                                 }
2713                         }
2714                 }
2715
2716                 if (!delete && (!backref->found_dir_index &&
2717                                 !backref->found_dir_item &&
2718                                 backref->found_inode_ref)) {
2719                         struct btrfs_trans_handle *trans;
2720                         struct btrfs_key location;
2721
2722                         ret = check_dir_conflict(root, backref->name,
2723                                                  backref->namelen,
2724                                                  backref->dir,
2725                                                  backref->index);
2726                         if (ret) {
2727                                 /*
2728                                  * let nlink fixing routine to handle it,
2729                                  * which can do it better.
2730                                  */
2731                                 ret = 0;
2732                                 break;
2733                         }
2734                         location.objectid = rec->ino;
2735                         location.type = BTRFS_INODE_ITEM_KEY;
2736                         location.offset = 0;
2737
2738                         trans = btrfs_start_transaction(root, 1);
2739                         if (IS_ERR(trans)) {
2740                                 ret = PTR_ERR(trans);
2741                                 break;
2742                         }
2743                         fprintf(stderr, "adding missing dir index/item pair "
2744                                 "for inode %llu\n",
2745                                 (unsigned long long)rec->ino);
2746                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2747                                                     backref->namelen,
2748                                                     backref->dir, &location,
2749                                                     imode_to_type(rec->imode),
2750                                                     backref->index);
2751                         BUG_ON(ret);
2752                         btrfs_commit_transaction(trans, root);
2753                         repaired++;
2754                 }
2755
2756                 if (!delete && (backref->found_inode_ref &&
2757                                 backref->found_dir_index &&
2758                                 backref->found_dir_item &&
2759                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2760                                 !rec->found_inode_item)) {
2761                         ret = create_inode_item(root, rec, 0);
2762                         if (ret)
2763                                 break;
2764                         repaired++;
2765                 }
2766
2767         }
2768         return ret ? ret : repaired;
2769 }
2770
2771 /*
2772  * To determine the file type for nlink/inode_item repair
2773  *
2774  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2775  * Return -ENOENT if file type is not found.
2776  */
2777 static int find_file_type(struct inode_record *rec, u8 *type)
2778 {
2779         struct inode_backref *backref;
2780
2781         /* For inode item recovered case */
2782         if (rec->found_inode_item) {
2783                 *type = imode_to_type(rec->imode);
2784                 return 0;
2785         }
2786
2787         list_for_each_entry(backref, &rec->backrefs, list) {
2788                 if (backref->found_dir_index || backref->found_dir_item) {
2789                         *type = backref->filetype;
2790                         return 0;
2791                 }
2792         }
2793         return -ENOENT;
2794 }
2795
2796 /*
2797  * To determine the file name for nlink repair
2798  *
2799  * Return 0 if file name is found, set name and namelen.
2800  * Return -ENOENT if file name is not found.
2801  */
2802 static int find_file_name(struct inode_record *rec,
2803                           char *name, int *namelen)
2804 {
2805         struct inode_backref *backref;
2806
2807         list_for_each_entry(backref, &rec->backrefs, list) {
2808                 if (backref->found_dir_index || backref->found_dir_item ||
2809                     backref->found_inode_ref) {
2810                         memcpy(name, backref->name, backref->namelen);
2811                         *namelen = backref->namelen;
2812                         return 0;
2813                 }
2814         }
2815         return -ENOENT;
2816 }
2817
2818 /* Reset the nlink of the inode to the correct one */
2819 static int reset_nlink(struct btrfs_trans_handle *trans,
2820                        struct btrfs_root *root,
2821                        struct btrfs_path *path,
2822                        struct inode_record *rec)
2823 {
2824         struct inode_backref *backref;
2825         struct inode_backref *tmp;
2826         struct btrfs_key key;
2827         struct btrfs_inode_item *inode_item;
2828         int ret = 0;
2829
2830         /* We don't believe this either, reset it and iterate backref */
2831         rec->found_link = 0;
2832
2833         /* Remove all backref including the valid ones */
2834         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2835                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2836                                    backref->index, backref->name,
2837                                    backref->namelen, 0);
2838                 if (ret < 0)
2839                         goto out;
2840
2841                 /* remove invalid backref, so it won't be added back */
2842                 if (!(backref->found_dir_index &&
2843                       backref->found_dir_item &&
2844                       backref->found_inode_ref)) {
2845                         list_del(&backref->list);
2846                         free(backref);
2847                 } else {
2848                         rec->found_link++;
2849                 }
2850         }
2851
2852         /* Set nlink to 0 */
2853         key.objectid = rec->ino;
2854         key.type = BTRFS_INODE_ITEM_KEY;
2855         key.offset = 0;
2856         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2857         if (ret < 0)
2858                 goto out;
2859         if (ret > 0) {
2860                 ret = -ENOENT;
2861                 goto out;
2862         }
2863         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2864                                     struct btrfs_inode_item);
2865         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2866         btrfs_mark_buffer_dirty(path->nodes[0]);
2867         btrfs_release_path(path);
2868
2869         /*
2870          * Add back valid inode_ref/dir_item/dir_index,
2871          * add_link() will handle the nlink inc, so new nlink must be correct
2872          */
2873         list_for_each_entry(backref, &rec->backrefs, list) {
2874                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2875                                      backref->name, backref->namelen,
2876                                      backref->filetype, &backref->index, 1);
2877                 if (ret < 0)
2878                         goto out;
2879         }
2880 out:
2881         btrfs_release_path(path);
2882         return ret;
2883 }
2884
2885 static int get_highest_inode(struct btrfs_trans_handle *trans,
2886                                 struct btrfs_root *root,
2887                                 struct btrfs_path *path,
2888                                 u64 *highest_ino)
2889 {
2890         struct btrfs_key key, found_key;
2891         int ret;
2892
2893         btrfs_init_path(path);
2894         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2895         key.offset = -1;
2896         key.type = BTRFS_INODE_ITEM_KEY;
2897         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2898         if (ret == 1) {
2899                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2900                                 path->slots[0] - 1);
2901                 *highest_ino = found_key.objectid;
2902                 ret = 0;
2903         }
2904         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2905                 ret = -EOVERFLOW;
2906         btrfs_release_path(path);
2907         return ret;
2908 }
2909
2910 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2911                                struct btrfs_root *root,
2912                                struct btrfs_path *path,
2913                                struct inode_record *rec)
2914 {
2915         char *dir_name = "lost+found";
2916         char namebuf[BTRFS_NAME_LEN] = {0};
2917         u64 lost_found_ino;
2918         u32 mode = 0700;
2919         u8 type = 0;
2920         int namelen = 0;
2921         int name_recovered = 0;
2922         int type_recovered = 0;
2923         int ret = 0;
2924
2925         /*
2926          * Get file name and type first before these invalid inode ref
2927          * are deleted by remove_all_invalid_backref()
2928          */
2929         name_recovered = !find_file_name(rec, namebuf, &namelen);
2930         type_recovered = !find_file_type(rec, &type);
2931
2932         if (!name_recovered) {
2933                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2934                        rec->ino, rec->ino);
2935                 namelen = count_digits(rec->ino);
2936                 sprintf(namebuf, "%llu", rec->ino);
2937                 name_recovered = 1;
2938         }
2939         if (!type_recovered) {
2940                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2941                        rec->ino);
2942                 type = BTRFS_FT_REG_FILE;
2943                 type_recovered = 1;
2944         }
2945
2946         ret = reset_nlink(trans, root, path, rec);
2947         if (ret < 0) {
2948                 fprintf(stderr,
2949                         "Failed to reset nlink for inode %llu: %s\n",
2950                         rec->ino, strerror(-ret));
2951                 goto out;
2952         }
2953
2954         if (rec->found_link == 0) {
2955                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2956                 if (ret < 0)
2957                         goto out;
2958                 lost_found_ino++;
2959                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2960                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2961                                   mode);
2962                 if (ret < 0) {
2963                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2964                                 dir_name, strerror(-ret));
2965                         goto out;
2966                 }
2967                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2968                                      namebuf, namelen, type, NULL, 1);
2969                 /*
2970                  * Add ".INO" suffix several times to handle case where
2971                  * "FILENAME.INO" is already taken by another file.
2972                  */
2973                 while (ret == -EEXIST) {
2974                         /*
2975                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2976                          */
2977                         if (namelen + count_digits(rec->ino) + 1 >
2978                             BTRFS_NAME_LEN) {
2979                                 ret = -EFBIG;
2980                                 goto out;
2981                         }
2982                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2983                                  ".%llu", rec->ino);
2984                         namelen += count_digits(rec->ino) + 1;
2985                         ret = btrfs_add_link(trans, root, rec->ino,
2986                                              lost_found_ino, namebuf,
2987                                              namelen, type, NULL, 1);
2988                 }
2989                 if (ret < 0) {
2990                         fprintf(stderr,
2991                                 "Failed to link the inode %llu to %s dir: %s\n",
2992                                 rec->ino, dir_name, strerror(-ret));
2993                         goto out;
2994                 }
2995                 /*
2996                  * Just increase the found_link, don't actually add the
2997                  * backref. This will make things easier and this inode
2998                  * record will be freed after the repair is done.
2999                  * So fsck will not report problem about this inode.
3000                  */
3001                 rec->found_link++;
3002                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3003                        namelen, namebuf, dir_name);
3004         }
3005         printf("Fixed the nlink of inode %llu\n", rec->ino);
3006 out:
3007         /*
3008          * Clear the flag anyway, or we will loop forever for the same inode
3009          * as it will not be removed from the bad inode list and the dead loop
3010          * happens.
3011          */
3012         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3013         btrfs_release_path(path);
3014         return ret;
3015 }
3016
3017 /*
3018  * Check if there is any normal(reg or prealloc) file extent for given
3019  * ino.
3020  * This is used to determine the file type when neither its dir_index/item or
3021  * inode_item exists.
3022  *
3023  * This will *NOT* report error, if any error happens, just consider it does
3024  * not have any normal file extent.
3025  */
3026 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3027 {
3028         struct btrfs_path path;
3029         struct btrfs_key key;
3030         struct btrfs_key found_key;
3031         struct btrfs_file_extent_item *fi;
3032         u8 type;
3033         int ret = 0;
3034
3035         btrfs_init_path(&path);
3036         key.objectid = ino;
3037         key.type = BTRFS_EXTENT_DATA_KEY;
3038         key.offset = 0;
3039
3040         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3041         if (ret < 0) {
3042                 ret = 0;
3043                 goto out;
3044         }
3045         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3046                 ret = btrfs_next_leaf(root, &path);
3047                 if (ret) {
3048                         ret = 0;
3049                         goto out;
3050                 }
3051         }
3052         while (1) {
3053                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3054                                       path.slots[0]);
3055                 if (found_key.objectid != ino ||
3056                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3057                         break;
3058                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3059                                     struct btrfs_file_extent_item);
3060                 type = btrfs_file_extent_type(path.nodes[0], fi);
3061                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3062                         ret = 1;
3063                         goto out;
3064                 }
3065         }
3066 out:
3067         btrfs_release_path(&path);
3068         return ret;
3069 }
3070
3071 static u32 btrfs_type_to_imode(u8 type)
3072 {
3073         static u32 imode_by_btrfs_type[] = {
3074                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3075                 [BTRFS_FT_DIR]          = S_IFDIR,
3076                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3077                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3078                 [BTRFS_FT_FIFO]         = S_IFIFO,
3079                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3080                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3081         };
3082
3083         return imode_by_btrfs_type[(type)];
3084 }
3085
3086 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3087                                 struct btrfs_root *root,
3088                                 struct btrfs_path *path,
3089                                 struct inode_record *rec)
3090 {
3091         u8 filetype;
3092         u32 mode = 0700;
3093         int type_recovered = 0;
3094         int ret = 0;
3095
3096         printf("Trying to rebuild inode:%llu\n", rec->ino);
3097
3098         type_recovered = !find_file_type(rec, &filetype);
3099
3100         /*
3101          * Try to determine inode type if type not found.
3102          *
3103          * For found regular file extent, it must be FILE.
3104          * For found dir_item/index, it must be DIR.
3105          *
3106          * For undetermined one, use FILE as fallback.
3107          *
3108          * TODO:
3109          * 1. If found backref(inode_index/item is already handled) to it,
3110          *    it must be DIR.
3111          *    Need new inode-inode ref structure to allow search for that.
3112          */
3113         if (!type_recovered) {
3114                 if (rec->found_file_extent &&
3115                     find_normal_file_extent(root, rec->ino)) {
3116                         type_recovered = 1;
3117                         filetype = BTRFS_FT_REG_FILE;
3118                 } else if (rec->found_dir_item) {
3119                         type_recovered = 1;
3120                         filetype = BTRFS_FT_DIR;
3121                 } else if (!list_empty(&rec->orphan_extents)) {
3122                         type_recovered = 1;
3123                         filetype = BTRFS_FT_REG_FILE;
3124                 } else{
3125                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3126                                rec->ino);
3127                         type_recovered = 1;
3128                         filetype = BTRFS_FT_REG_FILE;
3129                 }
3130         }
3131
3132         ret = btrfs_new_inode(trans, root, rec->ino,
3133                               mode | btrfs_type_to_imode(filetype));
3134         if (ret < 0)
3135                 goto out;
3136
3137         /*
3138          * Here inode rebuild is done, we only rebuild the inode item,
3139          * don't repair the nlink(like move to lost+found).
3140          * That is the job of nlink repair.
3141          *
3142          * We just fill the record and return
3143          */
3144         rec->found_dir_item = 1;
3145         rec->imode = mode | btrfs_type_to_imode(filetype);
3146         rec->nlink = 0;
3147         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3148         /* Ensure the inode_nlinks repair function will be called */
3149         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3150 out:
3151         return ret;
3152 }
3153
3154 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3155                                       struct btrfs_root *root,
3156                                       struct btrfs_path *path,
3157                                       struct inode_record *rec)
3158 {
3159         struct orphan_data_extent *orphan;
3160         struct orphan_data_extent *tmp;
3161         int ret = 0;
3162
3163         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3164                 /*
3165                  * Check for conflicting file extents
3166                  *
3167                  * Here we don't know whether the extents is compressed or not,
3168                  * so we can only assume it not compressed nor data offset,
3169                  * and use its disk_len as extent length.
3170                  */
3171                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3172                                        orphan->offset, orphan->disk_len, 0);
3173                 btrfs_release_path(path);
3174                 if (ret < 0)
3175                         goto out;
3176                 if (!ret) {
3177                         fprintf(stderr,
3178                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3179                                 orphan->disk_bytenr, orphan->disk_len);
3180                         ret = btrfs_free_extent(trans,
3181                                         root->fs_info->extent_root,
3182                                         orphan->disk_bytenr, orphan->disk_len,
3183                                         0, root->objectid, orphan->objectid,
3184                                         orphan->offset);
3185                         if (ret < 0)
3186                                 goto out;
3187                 }
3188                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3189                                 orphan->offset, orphan->disk_bytenr,
3190                                 orphan->disk_len, orphan->disk_len);
3191                 if (ret < 0)
3192                         goto out;
3193
3194                 /* Update file size info */
3195                 rec->found_size += orphan->disk_len;
3196                 if (rec->found_size == rec->nbytes)
3197                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3198
3199                 /* Update the file extent hole info too */
3200                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3201                                            orphan->disk_len);
3202                 if (ret < 0)
3203                         goto out;
3204                 if (RB_EMPTY_ROOT(&rec->holes))
3205                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3206
3207                 list_del(&orphan->list);
3208                 free(orphan);
3209         }
3210         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3211 out:
3212         return ret;
3213 }
3214
3215 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3216                                         struct btrfs_root *root,
3217                                         struct btrfs_path *path,
3218                                         struct inode_record *rec)
3219 {
3220         struct rb_node *node;
3221         struct file_extent_hole *hole;
3222         int found = 0;
3223         int ret = 0;
3224
3225         node = rb_first(&rec->holes);
3226
3227         while (node) {
3228                 found = 1;
3229                 hole = rb_entry(node, struct file_extent_hole, node);
3230                 ret = btrfs_punch_hole(trans, root, rec->ino,
3231                                        hole->start, hole->len);
3232                 if (ret < 0)
3233                         goto out;
3234                 ret = del_file_extent_hole(&rec->holes, hole->start,
3235                                            hole->len);
3236                 if (ret < 0)
3237                         goto out;
3238                 if (RB_EMPTY_ROOT(&rec->holes))
3239                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3240                 node = rb_first(&rec->holes);
3241         }
3242         /* special case for a file losing all its file extent */
3243         if (!found) {
3244                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3245                                        round_up(rec->isize,
3246                                                 root->fs_info->sectorsize));
3247                 if (ret < 0)
3248                         goto out;
3249         }
3250         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3251                rec->ino, root->objectid);
3252 out:
3253         return ret;
3254 }
3255
3256 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3257 {
3258         struct btrfs_trans_handle *trans;
3259         struct btrfs_path path;
3260         int ret = 0;
3261
3262         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3263                              I_ERR_NO_ORPHAN_ITEM |
3264                              I_ERR_LINK_COUNT_WRONG |
3265                              I_ERR_NO_INODE_ITEM |
3266                              I_ERR_FILE_EXTENT_ORPHAN |
3267                              I_ERR_FILE_EXTENT_DISCOUNT|
3268                              I_ERR_FILE_NBYTES_WRONG)))
3269                 return rec->errors;
3270
3271         /*
3272          * For nlink repair, it may create a dir and add link, so
3273          * 2 for parent(256)'s dir_index and dir_item
3274          * 2 for lost+found dir's inode_item and inode_ref
3275          * 1 for the new inode_ref of the file
3276          * 2 for lost+found dir's dir_index and dir_item for the file
3277          */
3278         trans = btrfs_start_transaction(root, 7);
3279         if (IS_ERR(trans))
3280                 return PTR_ERR(trans);
3281
3282         btrfs_init_path(&path);
3283         if (rec->errors & I_ERR_NO_INODE_ITEM)
3284                 ret = repair_inode_no_item(trans, root, &path, rec);
3285         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3286                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3287         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3288                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3289         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3290                 ret = repair_inode_isize(trans, root, &path, rec);
3291         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3292                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3293         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3294                 ret = repair_inode_nlinks(trans, root, &path, rec);
3295         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3296                 ret = repair_inode_nbytes(trans, root, &path, rec);
3297         btrfs_commit_transaction(trans, root);
3298         btrfs_release_path(&path);
3299         return ret;
3300 }
3301
3302 static int check_inode_recs(struct btrfs_root *root,
3303                             struct cache_tree *inode_cache)
3304 {
3305         struct cache_extent *cache;
3306         struct ptr_node *node;
3307         struct inode_record *rec;
3308         struct inode_backref *backref;
3309         int stage = 0;
3310         int ret = 0;
3311         int err = 0;
3312         u64 error = 0;
3313         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3314
3315         if (btrfs_root_refs(&root->root_item) == 0) {
3316                 if (!cache_tree_empty(inode_cache))
3317                         fprintf(stderr, "warning line %d\n", __LINE__);
3318                 return 0;
3319         }
3320
3321         /*
3322          * We need to repair backrefs first because we could change some of the
3323          * errors in the inode recs.
3324          *
3325          * We also need to go through and delete invalid backrefs first and then
3326          * add the correct ones second.  We do this because we may get EEXIST
3327          * when adding back the correct index because we hadn't yet deleted the
3328          * invalid index.
3329          *
3330          * For example, if we were missing a dir index then the directories
3331          * isize would be wrong, so if we fixed the isize to what we thought it
3332          * would be and then fixed the backref we'd still have a invalid fs, so
3333          * we need to add back the dir index and then check to see if the isize
3334          * is still wrong.
3335          */
3336         while (stage < 3) {
3337                 stage++;
3338                 if (stage == 3 && !err)
3339                         break;
3340
3341                 cache = search_cache_extent(inode_cache, 0);
3342                 while (repair && cache) {
3343                         node = container_of(cache, struct ptr_node, cache);
3344                         rec = node->data;
3345                         cache = next_cache_extent(cache);
3346
3347                         /* Need to free everything up and rescan */
3348                         if (stage == 3) {
3349                                 remove_cache_extent(inode_cache, &node->cache);
3350                                 free(node);
3351                                 free_inode_rec(rec);
3352                                 continue;
3353                         }
3354
3355                         if (list_empty(&rec->backrefs))
3356                                 continue;
3357
3358                         ret = repair_inode_backrefs(root, rec, inode_cache,
3359                                                     stage == 1);
3360                         if (ret < 0) {
3361                                 err = ret;
3362                                 stage = 2;
3363                                 break;
3364                         } if (ret > 0) {
3365                                 err = -EAGAIN;
3366                         }
3367                 }
3368         }
3369         if (err)
3370                 return err;
3371
3372         rec = get_inode_rec(inode_cache, root_dirid, 0);
3373         BUG_ON(IS_ERR(rec));
3374         if (rec) {
3375                 ret = check_root_dir(rec);
3376                 if (ret) {
3377                         fprintf(stderr, "root %llu root dir %llu error\n",
3378                                 (unsigned long long)root->root_key.objectid,
3379                                 (unsigned long long)root_dirid);
3380                         print_inode_error(root, rec);
3381                         error++;
3382                 }
3383         } else {
3384                 if (repair) {
3385                         struct btrfs_trans_handle *trans;
3386
3387                         trans = btrfs_start_transaction(root, 1);
3388                         if (IS_ERR(trans)) {
3389                                 err = PTR_ERR(trans);
3390                                 return err;
3391                         }
3392
3393                         fprintf(stderr,
3394                                 "root %llu missing its root dir, recreating\n",
3395                                 (unsigned long long)root->objectid);
3396
3397                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3398                         BUG_ON(ret);
3399
3400                         btrfs_commit_transaction(trans, root);
3401                         return -EAGAIN;
3402                 }
3403
3404                 fprintf(stderr, "root %llu root dir %llu not found\n",
3405                         (unsigned long long)root->root_key.objectid,
3406                         (unsigned long long)root_dirid);
3407         }
3408
3409         while (1) {
3410                 cache = search_cache_extent(inode_cache, 0);
3411                 if (!cache)
3412                         break;
3413                 node = container_of(cache, struct ptr_node, cache);
3414                 rec = node->data;
3415                 remove_cache_extent(inode_cache, &node->cache);
3416                 free(node);
3417                 if (rec->ino == root_dirid ||
3418                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3419                         free_inode_rec(rec);
3420                         continue;
3421                 }
3422
3423                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3424                         ret = check_orphan_item(root, rec->ino);
3425                         if (ret == 0)
3426                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3427                         if (can_free_inode_rec(rec)) {
3428                                 free_inode_rec(rec);
3429                                 continue;
3430                         }
3431                 }
3432
3433                 if (!rec->found_inode_item)
3434                         rec->errors |= I_ERR_NO_INODE_ITEM;
3435                 if (rec->found_link != rec->nlink)
3436                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3437                 if (repair) {
3438                         ret = try_repair_inode(root, rec);
3439                         if (ret == 0 && can_free_inode_rec(rec)) {
3440                                 free_inode_rec(rec);
3441                                 continue;
3442                         }
3443                         ret = 0;
3444                 }
3445
3446                 if (!(repair && ret == 0))
3447                         error++;
3448                 print_inode_error(root, rec);
3449                 list_for_each_entry(backref, &rec->backrefs, list) {
3450                         if (!backref->found_dir_item)
3451                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3452                         if (!backref->found_dir_index)
3453                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3454                         if (!backref->found_inode_ref)
3455                                 backref->errors |= REF_ERR_NO_INODE_REF;
3456                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3457                                 " namelen %u name %s filetype %d errors %x",
3458                                 (unsigned long long)backref->dir,
3459                                 (unsigned long long)backref->index,
3460                                 backref->namelen, backref->name,
3461                                 backref->filetype, backref->errors);
3462                         print_ref_error(backref->errors);
3463                 }
3464                 free_inode_rec(rec);
3465         }
3466         return (error > 0) ? -1 : 0;
3467 }
3468
3469 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3470                                         u64 objectid)
3471 {
3472         struct cache_extent *cache;
3473         struct root_record *rec = NULL;
3474         int ret;
3475
3476         cache = lookup_cache_extent(root_cache, objectid, 1);
3477         if (cache) {
3478                 rec = container_of(cache, struct root_record, cache);
3479         } else {
3480                 rec = calloc(1, sizeof(*rec));
3481                 if (!rec)
3482                         return ERR_PTR(-ENOMEM);
3483                 rec->objectid = objectid;
3484                 INIT_LIST_HEAD(&rec->backrefs);
3485                 rec->cache.start = objectid;
3486                 rec->cache.size = 1;
3487
3488                 ret = insert_cache_extent(root_cache, &rec->cache);
3489                 if (ret)
3490                         return ERR_PTR(-EEXIST);
3491         }
3492         return rec;
3493 }
3494
3495 static struct root_backref *get_root_backref(struct root_record *rec,
3496                                              u64 ref_root, u64 dir, u64 index,
3497                                              const char *name, int namelen)
3498 {
3499         struct root_backref *backref;
3500
3501         list_for_each_entry(backref, &rec->backrefs, list) {
3502                 if (backref->ref_root != ref_root || backref->dir != dir ||
3503                     backref->namelen != namelen)
3504                         continue;
3505                 if (memcmp(name, backref->name, namelen))
3506                         continue;
3507                 return backref;
3508         }
3509
3510         backref = calloc(1, sizeof(*backref) + namelen + 1);
3511         if (!backref)
3512                 return NULL;
3513         backref->ref_root = ref_root;
3514         backref->dir = dir;
3515         backref->index = index;
3516         backref->namelen = namelen;
3517         memcpy(backref->name, name, namelen);
3518         backref->name[namelen] = '\0';
3519         list_add_tail(&backref->list, &rec->backrefs);
3520         return backref;
3521 }
3522
3523 static void free_root_record(struct cache_extent *cache)
3524 {
3525         struct root_record *rec;
3526         struct root_backref *backref;
3527
3528         rec = container_of(cache, struct root_record, cache);
3529         while (!list_empty(&rec->backrefs)) {
3530                 backref = to_root_backref(rec->backrefs.next);
3531                 list_del(&backref->list);
3532                 free(backref);
3533         }
3534
3535         free(rec);
3536 }
3537
3538 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3539
3540 static int add_root_backref(struct cache_tree *root_cache,
3541                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3542                             const char *name, int namelen,
3543                             int item_type, int errors)
3544 {
3545         struct root_record *rec;
3546         struct root_backref *backref;
3547
3548         rec = get_root_rec(root_cache, root_id);
3549         BUG_ON(IS_ERR(rec));
3550         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3551         BUG_ON(!backref);
3552
3553         backref->errors |= errors;
3554
3555         if (item_type != BTRFS_DIR_ITEM_KEY) {
3556                 if (backref->found_dir_index || backref->found_back_ref ||
3557                     backref->found_forward_ref) {
3558                         if (backref->index != index)
3559                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3560                 } else {
3561                         backref->index = index;
3562                 }
3563         }
3564
3565         if (item_type == BTRFS_DIR_ITEM_KEY) {
3566                 if (backref->found_forward_ref)
3567                         rec->found_ref++;
3568                 backref->found_dir_item = 1;
3569         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3570                 backref->found_dir_index = 1;
3571         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3572                 if (backref->found_forward_ref)
3573                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3574                 else if (backref->found_dir_item)
3575                         rec->found_ref++;
3576                 backref->found_forward_ref = 1;
3577         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3578                 if (backref->found_back_ref)
3579                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3580                 backref->found_back_ref = 1;
3581         } else {
3582                 BUG_ON(1);
3583         }
3584
3585         if (backref->found_forward_ref && backref->found_dir_item)
3586                 backref->reachable = 1;
3587         return 0;
3588 }
3589
3590 static int merge_root_recs(struct btrfs_root *root,
3591                            struct cache_tree *src_cache,
3592                            struct cache_tree *dst_cache)
3593 {
3594         struct cache_extent *cache;
3595         struct ptr_node *node;
3596         struct inode_record *rec;
3597         struct inode_backref *backref;
3598         int ret = 0;
3599
3600         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3601                 free_inode_recs_tree(src_cache);
3602                 return 0;
3603         }
3604
3605         while (1) {
3606                 cache = search_cache_extent(src_cache, 0);
3607                 if (!cache)
3608                         break;
3609                 node = container_of(cache, struct ptr_node, cache);
3610                 rec = node->data;
3611                 remove_cache_extent(src_cache, &node->cache);
3612                 free(node);
3613
3614                 ret = is_child_root(root, root->objectid, rec->ino);
3615                 if (ret < 0)
3616                         break;
3617                 else if (ret == 0)
3618                         goto skip;
3619
3620                 list_for_each_entry(backref, &rec->backrefs, list) {
3621                         BUG_ON(backref->found_inode_ref);
3622                         if (backref->found_dir_item)
3623                                 add_root_backref(dst_cache, rec->ino,
3624                                         root->root_key.objectid, backref->dir,
3625                                         backref->index, backref->name,
3626                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3627                                         backref->errors);
3628                         if (backref->found_dir_index)
3629                                 add_root_backref(dst_cache, rec->ino,
3630                                         root->root_key.objectid, backref->dir,
3631                                         backref->index, backref->name,
3632                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3633                                         backref->errors);
3634                 }
3635 skip:
3636                 free_inode_rec(rec);
3637         }
3638         if (ret < 0)
3639                 return ret;
3640         return 0;
3641 }
3642
3643 static int check_root_refs(struct btrfs_root *root,
3644                            struct cache_tree *root_cache)
3645 {
3646         struct root_record *rec;
3647         struct root_record *ref_root;
3648         struct root_backref *backref;
3649         struct cache_extent *cache;
3650         int loop = 1;
3651         int ret;
3652         int error;
3653         int errors = 0;
3654
3655         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3656         BUG_ON(IS_ERR(rec));
3657         rec->found_ref = 1;
3658
3659         /* fixme: this can not detect circular references */
3660         while (loop) {
3661                 loop = 0;
3662                 cache = search_cache_extent(root_cache, 0);
3663                 while (1) {
3664                         if (!cache)
3665                                 break;
3666                         rec = container_of(cache, struct root_record, cache);
3667                         cache = next_cache_extent(cache);
3668
3669                         if (rec->found_ref == 0)
3670                                 continue;
3671
3672                         list_for_each_entry(backref, &rec->backrefs, list) {
3673                                 if (!backref->reachable)
3674                                         continue;
3675
3676                                 ref_root = get_root_rec(root_cache,
3677                                                         backref->ref_root);
3678                                 BUG_ON(IS_ERR(ref_root));
3679                                 if (ref_root->found_ref > 0)
3680                                         continue;
3681
3682                                 backref->reachable = 0;
3683                                 rec->found_ref--;
3684                                 if (rec->found_ref == 0)
3685                                         loop = 1;
3686                         }
3687                 }
3688         }
3689
3690         cache = search_cache_extent(root_cache, 0);
3691         while (1) {
3692                 if (!cache)
3693                         break;
3694                 rec = container_of(cache, struct root_record, cache);
3695                 cache = next_cache_extent(cache);
3696
3697                 if (rec->found_ref == 0 &&
3698                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3699                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3700                         ret = check_orphan_item(root->fs_info->tree_root,
3701                                                 rec->objectid);
3702                         if (ret == 0)
3703                                 continue;
3704
3705                         /*
3706                          * If we don't have a root item then we likely just have
3707                          * a dir item in a snapshot for this root but no actual
3708                          * ref key or anything so it's meaningless.
3709                          */
3710                         if (!rec->found_root_item)
3711                                 continue;
3712                         errors++;
3713                         fprintf(stderr, "fs tree %llu not referenced\n",
3714                                 (unsigned long long)rec->objectid);
3715                 }
3716
3717                 error = 0;
3718                 if (rec->found_ref > 0 && !rec->found_root_item)
3719                         error = 1;
3720                 list_for_each_entry(backref, &rec->backrefs, list) {
3721                         if (!backref->found_dir_item)
3722                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3723                         if (!backref->found_dir_index)
3724                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3725                         if (!backref->found_back_ref)
3726                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3727                         if (!backref->found_forward_ref)
3728                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3729                         if (backref->reachable && backref->errors)
3730                                 error = 1;
3731                 }
3732                 if (!error)
3733                         continue;
3734
3735                 errors++;
3736                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3737                         (unsigned long long)rec->objectid, rec->found_ref,
3738                          rec->found_root_item ? "" : "not found");
3739
3740                 list_for_each_entry(backref, &rec->backrefs, list) {
3741                         if (!backref->reachable)
3742                                 continue;
3743                         if (!backref->errors && rec->found_root_item)
3744                                 continue;
3745                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3746                                 " index %llu namelen %u name %s errors %x\n",
3747                                 (unsigned long long)backref->ref_root,
3748                                 (unsigned long long)backref->dir,
3749                                 (unsigned long long)backref->index,
3750                                 backref->namelen, backref->name,
3751                                 backref->errors);
3752                         print_ref_error(backref->errors);
3753                 }
3754         }
3755         return errors > 0 ? 1 : 0;
3756 }
3757
3758 static int process_root_ref(struct extent_buffer *eb, int slot,
3759                             struct btrfs_key *key,
3760                             struct cache_tree *root_cache)
3761 {
3762         u64 dirid;
3763         u64 index;
3764         u32 len;
3765         u32 name_len;
3766         struct btrfs_root_ref *ref;
3767         char namebuf[BTRFS_NAME_LEN];
3768         int error;
3769
3770         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3771
3772         dirid = btrfs_root_ref_dirid(eb, ref);
3773         index = btrfs_root_ref_sequence(eb, ref);
3774         name_len = btrfs_root_ref_name_len(eb, ref);
3775
3776         if (name_len <= BTRFS_NAME_LEN) {
3777                 len = name_len;
3778                 error = 0;
3779         } else {
3780                 len = BTRFS_NAME_LEN;
3781                 error = REF_ERR_NAME_TOO_LONG;
3782         }
3783         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3784
3785         if (key->type == BTRFS_ROOT_REF_KEY) {
3786                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3787                                  index, namebuf, len, key->type, error);
3788         } else {
3789                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3790                                  index, namebuf, len, key->type, error);
3791         }
3792         return 0;
3793 }
3794
3795 static void free_corrupt_block(struct cache_extent *cache)
3796 {
3797         struct btrfs_corrupt_block *corrupt;
3798
3799         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3800         free(corrupt);
3801 }
3802
3803 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3804
3805 /*
3806  * Repair the btree of the given root.
3807  *
3808  * The fix is to remove the node key in corrupt_blocks cache_tree.
3809  * and rebalance the tree.
3810  * After the fix, the btree should be writeable.
3811  */
3812 static int repair_btree(struct btrfs_root *root,
3813                         struct cache_tree *corrupt_blocks)
3814 {
3815         struct btrfs_trans_handle *trans;
3816         struct btrfs_path path;
3817         struct btrfs_corrupt_block *corrupt;
3818         struct cache_extent *cache;
3819         struct btrfs_key key;
3820         u64 offset;
3821         int level;
3822         int ret = 0;
3823
3824         if (cache_tree_empty(corrupt_blocks))
3825                 return 0;
3826
3827         trans = btrfs_start_transaction(root, 1);
3828         if (IS_ERR(trans)) {
3829                 ret = PTR_ERR(trans);
3830                 fprintf(stderr, "Error starting transaction: %s\n",
3831                         strerror(-ret));
3832                 return ret;
3833         }
3834         btrfs_init_path(&path);
3835         cache = first_cache_extent(corrupt_blocks);
3836         while (cache) {
3837                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3838                                        cache);
3839                 level = corrupt->level;
3840                 path.lowest_level = level;
3841                 key.objectid = corrupt->key.objectid;
3842                 key.type = corrupt->key.type;
3843                 key.offset = corrupt->key.offset;
3844
3845                 /*
3846                  * Here we don't want to do any tree balance, since it may
3847                  * cause a balance with corrupted brother leaf/node,
3848                  * so ins_len set to 0 here.
3849                  * Balance will be done after all corrupt node/leaf is deleted.
3850                  */
3851                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3852                 if (ret < 0)
3853                         goto out;
3854                 offset = btrfs_node_blockptr(path.nodes[level],
3855                                              path.slots[level]);
3856
3857                 /* Remove the ptr */
3858                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3859                 if (ret < 0)
3860                         goto out;
3861                 /*
3862                  * Remove the corresponding extent
3863                  * return value is not concerned.
3864                  */
3865                 btrfs_release_path(&path);
3866                 ret = btrfs_free_extent(trans, root, offset,
3867                                 root->fs_info->nodesize, 0,
3868                                 root->root_key.objectid, level - 1, 0);
3869                 cache = next_cache_extent(cache);
3870         }
3871
3872         /* Balance the btree using btrfs_search_slot() */
3873         cache = first_cache_extent(corrupt_blocks);
3874         while (cache) {
3875                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3876                                        cache);
3877                 memcpy(&key, &corrupt->key, sizeof(key));
3878                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3879                 if (ret < 0)
3880                         goto out;
3881                 /* return will always >0 since it won't find the item */
3882                 ret = 0;
3883                 btrfs_release_path(&path);
3884                 cache = next_cache_extent(cache);
3885         }
3886 out:
3887         btrfs_commit_transaction(trans, root);
3888         btrfs_release_path(&path);
3889         return ret;
3890 }
3891
3892 static int check_fs_root(struct btrfs_root *root,
3893                          struct cache_tree *root_cache,
3894                          struct walk_control *wc)
3895 {
3896         int ret = 0;
3897         int err = 0;
3898         int wret;
3899         int level;
3900         struct btrfs_path path;
3901         struct shared_node root_node;
3902         struct root_record *rec;
3903         struct btrfs_root_item *root_item = &root->root_item;
3904         struct cache_tree corrupt_blocks;
3905         struct orphan_data_extent *orphan;
3906         struct orphan_data_extent *tmp;
3907         enum btrfs_tree_block_status status;
3908         struct node_refs nrefs;
3909
3910         /*
3911          * Reuse the corrupt_block cache tree to record corrupted tree block
3912          *
3913          * Unlike the usage in extent tree check, here we do it in a per
3914          * fs/subvol tree base.
3915          */
3916         cache_tree_init(&corrupt_blocks);
3917         root->fs_info->corrupt_blocks = &corrupt_blocks;
3918
3919         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3920                 rec = get_root_rec(root_cache, root->root_key.objectid);
3921                 BUG_ON(IS_ERR(rec));
3922                 if (btrfs_root_refs(root_item) > 0)
3923                         rec->found_root_item = 1;
3924         }
3925
3926         btrfs_init_path(&path);
3927         memset(&root_node, 0, sizeof(root_node));
3928         cache_tree_init(&root_node.root_cache);
3929         cache_tree_init(&root_node.inode_cache);
3930         memset(&nrefs, 0, sizeof(nrefs));
3931
3932         /* Move the orphan extent record to corresponding inode_record */
3933         list_for_each_entry_safe(orphan, tmp,
3934                                  &root->orphan_data_extents, list) {
3935                 struct inode_record *inode;
3936
3937                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3938                                       1);
3939                 BUG_ON(IS_ERR(inode));
3940                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3941                 list_move(&orphan->list, &inode->orphan_extents);
3942         }
3943
3944         level = btrfs_header_level(root->node);
3945         memset(wc->nodes, 0, sizeof(wc->nodes));
3946         wc->nodes[level] = &root_node;
3947         wc->active_node = level;
3948         wc->root_level = level;
3949
3950         /* We may not have checked the root block, lets do that now */
3951         if (btrfs_is_leaf(root->node))
3952                 status = btrfs_check_leaf(root, NULL, root->node);
3953         else
3954                 status = btrfs_check_node(root, NULL, root->node);
3955         if (status != BTRFS_TREE_BLOCK_CLEAN)
3956                 return -EIO;
3957
3958         if (btrfs_root_refs(root_item) > 0 ||
3959             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3960                 path.nodes[level] = root->node;
3961                 extent_buffer_get(root->node);
3962                 path.slots[level] = 0;
3963         } else {
3964                 struct btrfs_key key;
3965                 struct btrfs_disk_key found_key;
3966
3967                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3968                 level = root_item->drop_level;
3969                 path.lowest_level = level;
3970                 if (level > btrfs_header_level(root->node) ||
3971                     level >= BTRFS_MAX_LEVEL) {
3972                         error("ignoring invalid drop level: %u", level);
3973                         goto skip_walking;
3974                 }
3975                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3976                 if (wret < 0)
3977                         goto skip_walking;
3978                 btrfs_node_key(path.nodes[level], &found_key,
3979                                 path.slots[level]);
3980                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3981                                         sizeof(found_key)));
3982         }
3983
3984         while (1) {
3985                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3986                 if (wret < 0)
3987                         ret = wret;
3988                 if (wret != 0)
3989                         break;
3990
3991                 wret = walk_up_tree(root, &path, wc, &level);
3992                 if (wret < 0)
3993                         ret = wret;
3994                 if (wret != 0)
3995                         break;
3996         }
3997 skip_walking:
3998         btrfs_release_path(&path);
3999
4000         if (!cache_tree_empty(&corrupt_blocks)) {
4001                 struct cache_extent *cache;
4002                 struct btrfs_corrupt_block *corrupt;
4003
4004                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4005                        root->root_key.objectid);
4006                 cache = first_cache_extent(&corrupt_blocks);
4007                 while (cache) {
4008                         corrupt = container_of(cache,
4009                                                struct btrfs_corrupt_block,
4010                                                cache);
4011                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4012                                cache->start, corrupt->level,
4013                                corrupt->key.objectid, corrupt->key.type,
4014                                corrupt->key.offset);
4015                         cache = next_cache_extent(cache);
4016                 }
4017                 if (repair) {
4018                         printf("Try to repair the btree for root %llu\n",
4019                                root->root_key.objectid);
4020                         ret = repair_btree(root, &corrupt_blocks);
4021                         if (ret < 0)
4022                                 fprintf(stderr, "Failed to repair btree: %s\n",
4023                                         strerror(-ret));
4024                         if (!ret)
4025                                 printf("Btree for root %llu is fixed\n",
4026                                        root->root_key.objectid);
4027                 }
4028         }
4029
4030         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4031         if (err < 0)
4032                 ret = err;
4033
4034         if (root_node.current) {
4035                 root_node.current->checked = 1;
4036                 maybe_free_inode_rec(&root_node.inode_cache,
4037                                 root_node.current);
4038         }
4039
4040         err = check_inode_recs(root, &root_node.inode_cache);
4041         if (!ret)
4042                 ret = err;
4043
4044         free_corrupt_blocks_tree(&corrupt_blocks);
4045         root->fs_info->corrupt_blocks = NULL;
4046         free_orphan_data_extents(&root->orphan_data_extents);
4047         return ret;
4048 }
4049
4050 static int fs_root_objectid(u64 objectid)
4051 {
4052         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4053             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4054                 return 1;
4055         return is_fstree(objectid);
4056 }
4057
4058 static int check_fs_roots(struct btrfs_root *root,
4059                           struct cache_tree *root_cache)
4060 {
4061         struct btrfs_path path;
4062         struct btrfs_key key;
4063         struct walk_control wc;
4064         struct extent_buffer *leaf, *tree_node;
4065         struct btrfs_root *tmp_root;
4066         struct btrfs_root *tree_root = root->fs_info->tree_root;
4067         int ret;
4068         int err = 0;
4069
4070         if (ctx.progress_enabled) {
4071                 ctx.tp = TASK_FS_ROOTS;
4072                 task_start(ctx.info);
4073         }
4074
4075         /*
4076          * Just in case we made any changes to the extent tree that weren't
4077          * reflected into the free space cache yet.
4078          */
4079         if (repair)
4080                 reset_cached_block_groups(root->fs_info);
4081         memset(&wc, 0, sizeof(wc));
4082         cache_tree_init(&wc.shared);
4083         btrfs_init_path(&path);
4084
4085 again:
4086         key.offset = 0;
4087         key.objectid = 0;
4088         key.type = BTRFS_ROOT_ITEM_KEY;
4089         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4090         if (ret < 0) {
4091                 err = 1;
4092                 goto out;
4093         }
4094         tree_node = tree_root->node;
4095         while (1) {
4096                 if (tree_node != tree_root->node) {
4097                         free_root_recs_tree(root_cache);
4098                         btrfs_release_path(&path);
4099                         goto again;
4100                 }
4101                 leaf = path.nodes[0];
4102                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4103                         ret = btrfs_next_leaf(tree_root, &path);
4104                         if (ret) {
4105                                 if (ret < 0)
4106                                         err = 1;
4107                                 break;
4108                         }
4109                         leaf = path.nodes[0];
4110                 }
4111                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4112                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4113                     fs_root_objectid(key.objectid)) {
4114                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4115                                 tmp_root = btrfs_read_fs_root_no_cache(
4116                                                 root->fs_info, &key);
4117                         } else {
4118                                 key.offset = (u64)-1;
4119                                 tmp_root = btrfs_read_fs_root(
4120                                                 root->fs_info, &key);
4121                         }
4122                         if (IS_ERR(tmp_root)) {
4123                                 err = 1;
4124                                 goto next;
4125                         }
4126                         ret = check_fs_root(tmp_root, root_cache, &wc);
4127                         if (ret == -EAGAIN) {
4128                                 free_root_recs_tree(root_cache);
4129                                 btrfs_release_path(&path);
4130                                 goto again;
4131                         }
4132                         if (ret)
4133                                 err = 1;
4134                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4135                                 btrfs_free_fs_root(tmp_root);
4136                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4137                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4138                         process_root_ref(leaf, path.slots[0], &key,
4139                                          root_cache);
4140                 }
4141 next:
4142                 path.slots[0]++;
4143         }
4144 out:
4145         btrfs_release_path(&path);
4146         if (err)
4147                 free_extent_cache_tree(&wc.shared);
4148         if (!cache_tree_empty(&wc.shared))
4149                 fprintf(stderr, "warning line %d\n", __LINE__);
4150
4151         task_stop(ctx.info);
4152
4153         return err;
4154 }
4155
4156 /*
4157  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4158  * INODE_REF/INODE_EXTREF match.
4159  *
4160  * @root:       the root of the fs/file tree
4161  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4162  * @key:        the key of the DIR_ITEM/DIR_INDEX
4163  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4164  *              distinguish root_dir between normal dir/file
4165  * @name:       the name in the INODE_REF/INODE_EXTREF
4166  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4167  * @mode:       the st_mode of INODE_ITEM
4168  *
4169  * Return 0 if no error occurred.
4170  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4171  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4172  * dir/file.
4173  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4174  * not match for normal dir/file.
4175  */
4176 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4177                          struct btrfs_key *key, u64 index, char *name,
4178                          u32 namelen, u32 mode)
4179 {
4180         struct btrfs_path path;
4181         struct extent_buffer *node;
4182         struct btrfs_dir_item *di;
4183         struct btrfs_key location;
4184         char namebuf[BTRFS_NAME_LEN] = {0};
4185         u32 total;
4186         u32 cur = 0;
4187         u32 len;
4188         u32 name_len;
4189         u32 data_len;
4190         u8 filetype;
4191         int slot;
4192         int ret;
4193
4194         btrfs_init_path(&path);
4195         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4196         if (ret < 0) {
4197                 ret = DIR_ITEM_MISSING;
4198                 goto out;
4199         }
4200
4201         /* Process root dir and goto out*/
4202         if (index == 0) {
4203                 if (ret == 0) {
4204                         ret = ROOT_DIR_ERROR;
4205                         error(
4206                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4207                                 root->objectid,
4208                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4209                                         "REF" : "EXTREF",
4210                                 ref_key->objectid, ref_key->offset,
4211                                 key->type == BTRFS_DIR_ITEM_KEY ?
4212                                         "DIR_ITEM" : "DIR_INDEX");
4213                 } else {
4214                         ret = 0;
4215                 }
4216
4217                 goto out;
4218         }
4219
4220         /* Process normal file/dir */
4221         if (ret > 0) {
4222                 ret = DIR_ITEM_MISSING;
4223                 error(
4224                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4225                         root->objectid,
4226                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4227                         ref_key->objectid, ref_key->offset,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                                 "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, namelen, name,
4231                         imode_to_type(mode));
4232                 goto out;
4233         }
4234
4235         /* Check whether inode_id/filetype/name match */
4236         node = path.nodes[0];
4237         slot = path.slots[0];
4238         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4239         total = btrfs_item_size_nr(node, slot);
4240         while (cur < total) {
4241                 ret = DIR_ITEM_MISMATCH;
4242                 name_len = btrfs_dir_name_len(node, di);
4243                 data_len = btrfs_dir_data_len(node, di);
4244
4245                 btrfs_dir_item_key_to_cpu(node, di, &location);
4246                 if (location.objectid != ref_key->objectid ||
4247                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4248                     location.offset != 0)
4249                         goto next;
4250
4251                 filetype = btrfs_dir_type(node, di);
4252                 if (imode_to_type(mode) != filetype)
4253                         goto next;
4254
4255                 if (cur + sizeof(*di) + name_len > total ||
4256                     name_len > BTRFS_NAME_LEN) {
4257                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4258                                 root->objectid,
4259                                 key->type == BTRFS_DIR_ITEM_KEY ?
4260                                 "DIR_ITEM" : "DIR_INDEX",
4261                                 key->objectid, key->offset, name_len);
4262
4263                         if (cur + sizeof(*di) > total)
4264                                 break;
4265                         len = min_t(u32, total - cur - sizeof(*di),
4266                                     BTRFS_NAME_LEN);
4267                 } else {
4268                         len = name_len;
4269                 }
4270
4271                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4272                 if (len != namelen || strncmp(namebuf, name, len))
4273                         goto next;
4274
4275                 ret = 0;
4276                 goto out;
4277 next:
4278                 len = sizeof(*di) + name_len + data_len;
4279                 di = (struct btrfs_dir_item *)((char *)di + len);
4280                 cur += len;
4281         }
4282         if (ret == DIR_ITEM_MISMATCH)
4283                 error(
4284                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4285                         root->objectid,
4286                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4287                         ref_key->objectid, ref_key->offset,
4288                         key->type == BTRFS_DIR_ITEM_KEY ?
4289                                 "DIR_ITEM" : "DIR_INDEX",
4290                         key->objectid, key->offset, namelen, name,
4291                         imode_to_type(mode));
4292 out:
4293         btrfs_release_path(&path);
4294         return ret;
4295 }
4296
4297 /*
4298  * Traverse the given INODE_REF and call find_dir_item() to find related
4299  * DIR_ITEM/DIR_INDEX.
4300  *
4301  * @root:       the root of the fs/file tree
4302  * @ref_key:    the key of the INODE_REF
4303  * @refs:       the count of INODE_REF
4304  * @mode:       the st_mode of INODE_ITEM
4305  *
4306  * Return 0 if no error occurred.
4307  */
4308 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4309                            struct extent_buffer *node, int slot, u64 *refs,
4310                            int mode)
4311 {
4312         struct btrfs_key key;
4313         struct btrfs_inode_ref *ref;
4314         char namebuf[BTRFS_NAME_LEN] = {0};
4315         u32 total;
4316         u32 cur = 0;
4317         u32 len;
4318         u32 name_len;
4319         u64 index;
4320         int ret, err = 0;
4321
4322         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4323         total = btrfs_item_size_nr(node, slot);
4324
4325 next:
4326         /* Update inode ref count */
4327         (*refs)++;
4328
4329         index = btrfs_inode_ref_index(node, ref);
4330         name_len = btrfs_inode_ref_name_len(node, ref);
4331         if (cur + sizeof(*ref) + name_len > total ||
4332             name_len > BTRFS_NAME_LEN) {
4333                 warning("root %llu INODE_REF[%llu %llu] name too long",
4334                         root->objectid, ref_key->objectid, ref_key->offset);
4335
4336                 if (total < cur + sizeof(*ref))
4337                         goto out;
4338                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4339         } else {
4340                 len = name_len;
4341         }
4342
4343         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4344
4345         /* Check root dir ref name */
4346         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4347                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4348                       root->objectid, ref_key->objectid, ref_key->offset,
4349                       namebuf);
4350                 err |= ROOT_DIR_ERROR;
4351         }
4352
4353         /* Find related DIR_INDEX */
4354         key.objectid = ref_key->offset;
4355         key.type = BTRFS_DIR_INDEX_KEY;
4356         key.offset = index;
4357         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4358         err |= ret;
4359
4360         /* Find related dir_item */
4361         key.objectid = ref_key->offset;
4362         key.type = BTRFS_DIR_ITEM_KEY;
4363         key.offset = btrfs_name_hash(namebuf, len);
4364         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4365         err |= ret;
4366
4367         len = sizeof(*ref) + name_len;
4368         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4369         cur += len;
4370         if (cur < total)
4371                 goto next;
4372
4373 out:
4374         return err;
4375 }
4376
4377 /*
4378  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4379  * DIR_ITEM/DIR_INDEX.
4380  *
4381  * @root:       the root of the fs/file tree
4382  * @ref_key:    the key of the INODE_EXTREF
4383  * @refs:       the count of INODE_EXTREF
4384  * @mode:       the st_mode of INODE_ITEM
4385  *
4386  * Return 0 if no error occurred.
4387  */
4388 static int check_inode_extref(struct btrfs_root *root,
4389                               struct btrfs_key *ref_key,
4390                               struct extent_buffer *node, int slot, u64 *refs,
4391                               int mode)
4392 {
4393         struct btrfs_key key;
4394         struct btrfs_inode_extref *extref;
4395         char namebuf[BTRFS_NAME_LEN] = {0};
4396         u32 total;
4397         u32 cur = 0;
4398         u32 len;
4399         u32 name_len;
4400         u64 index;
4401         u64 parent;
4402         int ret;
4403         int err = 0;
4404
4405         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4406         total = btrfs_item_size_nr(node, slot);
4407
4408 next:
4409         /* update inode ref count */
4410         (*refs)++;
4411         name_len = btrfs_inode_extref_name_len(node, extref);
4412         index = btrfs_inode_extref_index(node, extref);
4413         parent = btrfs_inode_extref_parent(node, extref);
4414         if (name_len <= BTRFS_NAME_LEN) {
4415                 len = name_len;
4416         } else {
4417                 len = BTRFS_NAME_LEN;
4418                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4419                         root->objectid, ref_key->objectid, ref_key->offset);
4420         }
4421         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4422
4423         /* Check root dir ref name */
4424         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4425                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4426                       root->objectid, ref_key->objectid, ref_key->offset,
4427                       namebuf);
4428                 err |= ROOT_DIR_ERROR;
4429         }
4430
4431         /* find related dir_index */
4432         key.objectid = parent;
4433         key.type = BTRFS_DIR_INDEX_KEY;
4434         key.offset = index;
4435         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4436         err |= ret;
4437
4438         /* find related dir_item */
4439         key.objectid = parent;
4440         key.type = BTRFS_DIR_ITEM_KEY;
4441         key.offset = btrfs_name_hash(namebuf, len);
4442         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4443         err |= ret;
4444
4445         len = sizeof(*extref) + name_len;
4446         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4447         cur += len;
4448
4449         if (cur < total)
4450                 goto next;
4451
4452         return err;
4453 }
4454
4455 /*
4456  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4457  * DIR_ITEM/DIR_INDEX match.
4458  *
4459  * @root:       the root of the fs/file tree
4460  * @key:        the key of the INODE_REF/INODE_EXTREF
4461  * @name:       the name in the INODE_REF/INODE_EXTREF
4462  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4463  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4464  * to (u64)-1
4465  * @ext_ref:    the EXTENDED_IREF feature
4466  *
4467  * Return 0 if no error occurred.
4468  * Return >0 for error bitmap
4469  */
4470 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4471                           char *name, int namelen, u64 index,
4472                           unsigned int ext_ref)
4473 {
4474         struct btrfs_path path;
4475         struct btrfs_inode_ref *ref;
4476         struct btrfs_inode_extref *extref;
4477         struct extent_buffer *node;
4478         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4479         u32 total;
4480         u32 cur = 0;
4481         u32 len;
4482         u32 ref_namelen;
4483         u64 ref_index;
4484         u64 parent;
4485         u64 dir_id;
4486         int slot;
4487         int ret;
4488
4489         btrfs_init_path(&path);
4490         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4491         if (ret) {
4492                 ret = INODE_REF_MISSING;
4493                 goto extref;
4494         }
4495
4496         node = path.nodes[0];
4497         slot = path.slots[0];
4498
4499         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4500         total = btrfs_item_size_nr(node, slot);
4501
4502         /* Iterate all entry of INODE_REF */
4503         while (cur < total) {
4504                 ret = INODE_REF_MISSING;
4505
4506                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4507                 ref_index = btrfs_inode_ref_index(node, ref);
4508                 if (index != (u64)-1 && index != ref_index)
4509                         goto next_ref;
4510
4511                 if (cur + sizeof(*ref) + ref_namelen > total ||
4512                     ref_namelen > BTRFS_NAME_LEN) {
4513                         warning("root %llu INODE %s[%llu %llu] name too long",
4514                                 root->objectid,
4515                                 key->type == BTRFS_INODE_REF_KEY ?
4516                                         "REF" : "EXTREF",
4517                                 key->objectid, key->offset);
4518
4519                         if (cur + sizeof(*ref) > total)
4520                                 break;
4521                         len = min_t(u32, total - cur - sizeof(*ref),
4522                                     BTRFS_NAME_LEN);
4523                 } else {
4524                         len = ref_namelen;
4525                 }
4526
4527                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4528                                    len);
4529
4530                 if (len != namelen || strncmp(ref_namebuf, name, len))
4531                         goto next_ref;
4532
4533                 ret = 0;
4534                 goto out;
4535 next_ref:
4536                 len = sizeof(*ref) + ref_namelen;
4537                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4538                 cur += len;
4539         }
4540
4541 extref:
4542         /* Skip if not support EXTENDED_IREF feature */
4543         if (!ext_ref)
4544                 goto out;
4545
4546         btrfs_release_path(&path);
4547         btrfs_init_path(&path);
4548
4549         dir_id = key->offset;
4550         key->type = BTRFS_INODE_EXTREF_KEY;
4551         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4552
4553         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4554         if (ret) {
4555                 ret = INODE_REF_MISSING;
4556                 goto out;
4557         }
4558
4559         node = path.nodes[0];
4560         slot = path.slots[0];
4561
4562         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4563         cur = 0;
4564         total = btrfs_item_size_nr(node, slot);
4565
4566         /* Iterate all entry of INODE_EXTREF */
4567         while (cur < total) {
4568                 ret = INODE_REF_MISSING;
4569
4570                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4571                 ref_index = btrfs_inode_extref_index(node, extref);
4572                 parent = btrfs_inode_extref_parent(node, extref);
4573                 if (index != (u64)-1 && index != ref_index)
4574                         goto next_extref;
4575
4576                 if (parent != dir_id)
4577                         goto next_extref;
4578
4579                 if (ref_namelen <= BTRFS_NAME_LEN) {
4580                         len = ref_namelen;
4581                 } else {
4582                         len = BTRFS_NAME_LEN;
4583                         warning("root %llu INODE %s[%llu %llu] name too long",
4584                                 root->objectid,
4585                                 key->type == BTRFS_INODE_REF_KEY ?
4586                                         "REF" : "EXTREF",
4587                                 key->objectid, key->offset);
4588                 }
4589                 read_extent_buffer(node, ref_namebuf,
4590                                    (unsigned long)(extref + 1), len);
4591
4592                 if (len != namelen || strncmp(ref_namebuf, name, len))
4593                         goto next_extref;
4594
4595                 ret = 0;
4596                 goto out;
4597
4598 next_extref:
4599                 len = sizeof(*extref) + ref_namelen;
4600                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4601                 cur += len;
4602
4603         }
4604 out:
4605         btrfs_release_path(&path);
4606         return ret;
4607 }
4608
4609 /*
4610  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4611  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4612  *
4613  * @root:       the root of the fs/file tree
4614  * @key:        the key of the INODE_REF/INODE_EXTREF
4615  * @size:       the st_size of the INODE_ITEM
4616  * @ext_ref:    the EXTENDED_IREF feature
4617  *
4618  * Return 0 if no error occurred.
4619  */
4620 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4621                           struct extent_buffer *node, int slot, u64 *size,
4622                           unsigned int ext_ref)
4623 {
4624         struct btrfs_dir_item *di;
4625         struct btrfs_inode_item *ii;
4626         struct btrfs_path path;
4627         struct btrfs_key location;
4628         char namebuf[BTRFS_NAME_LEN] = {0};
4629         u32 total;
4630         u32 cur = 0;
4631         u32 len;
4632         u32 name_len;
4633         u32 data_len;
4634         u8 filetype;
4635         u32 mode;
4636         u64 index;
4637         int ret;
4638         int err = 0;
4639
4640         /*
4641          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4642          * ignore index check.
4643          */
4644         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4645
4646         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4647         total = btrfs_item_size_nr(node, slot);
4648
4649         while (cur < total) {
4650                 data_len = btrfs_dir_data_len(node, di);
4651                 if (data_len)
4652                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4653                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4654                               "DIR_ITEM" : "DIR_INDEX",
4655                               key->objectid, key->offset, data_len);
4656
4657                 name_len = btrfs_dir_name_len(node, di);
4658                 if (cur + sizeof(*di) + name_len > total ||
4659                     name_len > BTRFS_NAME_LEN) {
4660                         warning("root %llu %s[%llu %llu] name too long",
4661                                 root->objectid,
4662                                 key->type == BTRFS_DIR_ITEM_KEY ?
4663                                 "DIR_ITEM" : "DIR_INDEX",
4664                                 key->objectid, key->offset);
4665
4666                         if (cur + sizeof(*di) > total)
4667                                 break;
4668                         len = min_t(u32, total - cur - sizeof(*di),
4669                                     BTRFS_NAME_LEN);
4670                 } else {
4671                         len = name_len;
4672                 }
4673                 (*size) += name_len;
4674
4675                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4676                 filetype = btrfs_dir_type(node, di);
4677
4678                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4679                     key->offset != btrfs_name_hash(namebuf, len)) {
4680                         err |= -EIO;
4681                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4682                                 root->objectid, key->objectid, key->offset,
4683                                 namebuf, len, filetype, key->offset,
4684                                 btrfs_name_hash(namebuf, len));
4685                 }
4686
4687                 btrfs_init_path(&path);
4688                 btrfs_dir_item_key_to_cpu(node, di, &location);
4689
4690                 /* Ignore related ROOT_ITEM check */
4691                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4692                         goto next;
4693
4694                 /* Check relative INODE_ITEM(existence/filetype) */
4695                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4696                 if (ret) {
4697                         err |= INODE_ITEM_MISSING;
4698                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4699                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4700                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4701                               key->offset, location.objectid, name_len,
4702                               namebuf, filetype);
4703                         goto next;
4704                 }
4705
4706                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4707                                     struct btrfs_inode_item);
4708                 mode = btrfs_inode_mode(path.nodes[0], ii);
4709
4710                 if (imode_to_type(mode) != filetype) {
4711                         err |= INODE_ITEM_MISMATCH;
4712                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4713                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4714                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4715                               key->offset, name_len, namebuf, filetype);
4716                 }
4717
4718                 /* Check relative INODE_REF/INODE_EXTREF */
4719                 location.type = BTRFS_INODE_REF_KEY;
4720                 location.offset = key->objectid;
4721                 ret = find_inode_ref(root, &location, namebuf, len,
4722                                        index, ext_ref);
4723                 err |= ret;
4724                 if (ret & INODE_REF_MISSING)
4725                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4726                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4727                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4728                               key->offset, name_len, namebuf, filetype);
4729
4730 next:
4731                 btrfs_release_path(&path);
4732                 len = sizeof(*di) + name_len + data_len;
4733                 di = (struct btrfs_dir_item *)((char *)di + len);
4734                 cur += len;
4735
4736                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4737                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4738                               root->objectid, key->objectid, key->offset);
4739                         break;
4740                 }
4741         }
4742
4743         return err;
4744 }
4745
4746 /*
4747  * Check file extent datasum/hole, update the size of the file extents,
4748  * check and update the last offset of the file extent.
4749  *
4750  * @root:       the root of fs/file tree.
4751  * @fkey:       the key of the file extent.
4752  * @nodatasum:  INODE_NODATASUM feature.
4753  * @size:       the sum of all EXTENT_DATA items size for this inode.
4754  * @end:        the offset of the last extent.
4755  *
4756  * Return 0 if no error occurred.
4757  */
4758 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4759                              struct extent_buffer *node, int slot,
4760                              unsigned int nodatasum, u64 *size, u64 *end)
4761 {
4762         struct btrfs_file_extent_item *fi;
4763         u64 disk_bytenr;
4764         u64 disk_num_bytes;
4765         u64 extent_num_bytes;
4766         u64 extent_offset;
4767         u64 csum_found;         /* In byte size, sectorsize aligned */
4768         u64 search_start;       /* Logical range start we search for csum */
4769         u64 search_len;         /* Logical range len we search for csum */
4770         unsigned int extent_type;
4771         unsigned int is_hole;
4772         int compressed = 0;
4773         int ret;
4774         int err = 0;
4775
4776         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4777
4778         /* Check inline extent */
4779         extent_type = btrfs_file_extent_type(node, fi);
4780         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4781                 struct btrfs_item *e = btrfs_item_nr(slot);
4782                 u32 item_inline_len;
4783
4784                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4785                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4786                 compressed = btrfs_file_extent_compression(node, fi);
4787                 if (extent_num_bytes == 0) {
4788                         error(
4789                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4790                                 root->objectid, fkey->objectid, fkey->offset);
4791                         err |= FILE_EXTENT_ERROR;
4792                 }
4793                 if (!compressed && extent_num_bytes != item_inline_len) {
4794                         error(
4795                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4796                                 root->objectid, fkey->objectid, fkey->offset,
4797                                 extent_num_bytes, item_inline_len);
4798                         err |= FILE_EXTENT_ERROR;
4799                 }
4800                 *end += extent_num_bytes;
4801                 *size += extent_num_bytes;
4802                 return err;
4803         }
4804
4805         /* Check extent type */
4806         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4807                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4808                 err |= FILE_EXTENT_ERROR;
4809                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4810                       root->objectid, fkey->objectid, fkey->offset);
4811                 return err;
4812         }
4813
4814         /* Check REG_EXTENT/PREALLOC_EXTENT */
4815         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4816         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4817         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4818         extent_offset = btrfs_file_extent_offset(node, fi);
4819         compressed = btrfs_file_extent_compression(node, fi);
4820         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4821
4822         /*
4823          * Check EXTENT_DATA csum
4824          *
4825          * For plain (uncompressed) extent, we should only check the range
4826          * we're referring to, as it's possible that part of prealloc extent
4827          * has been written, and has csum:
4828          *
4829          * |<--- Original large preallocated extent A ---->|
4830          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4831          *      No csum                         Has csum
4832          *
4833          * For compressed extent, we should check the whole range.
4834          */
4835         if (!compressed) {
4836                 search_start = disk_bytenr + extent_offset;
4837                 search_len = extent_num_bytes;
4838         } else {
4839                 search_start = disk_bytenr;
4840                 search_len = disk_num_bytes;
4841         }
4842         ret = count_csum_range(root, search_start, search_len, &csum_found);
4843         if (csum_found > 0 && nodatasum) {
4844                 err |= ODD_CSUM_ITEM;
4845                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4846                       root->objectid, fkey->objectid, fkey->offset);
4847         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4848                    !is_hole && (ret < 0 || csum_found < search_len)) {
4849                 err |= CSUM_ITEM_MISSING;
4850                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4851                       root->objectid, fkey->objectid, fkey->offset,
4852                       csum_found, search_len);
4853         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4854                 err |= ODD_CSUM_ITEM;
4855                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4856                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4857         }
4858
4859         /* Check EXTENT_DATA hole */
4860         if (!no_holes && *end != fkey->offset) {
4861                 err |= FILE_EXTENT_ERROR;
4862                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4863                       root->objectid, fkey->objectid, fkey->offset);
4864         }
4865
4866         *end += extent_num_bytes;
4867         if (!is_hole)
4868                 *size += extent_num_bytes;
4869
4870         return err;
4871 }
4872
4873 /*
4874  * Check INODE_ITEM and related ITEMs (the same inode number)
4875  * 1. check link count
4876  * 2. check inode ref/extref
4877  * 3. check dir item/index
4878  *
4879  * @ext_ref:    the EXTENDED_IREF feature
4880  *
4881  * Return 0 if no error occurred.
4882  * Return >0 for error or hit the traversal is done(by error bitmap)
4883  */
4884 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4885                             unsigned int ext_ref)
4886 {
4887         struct extent_buffer *node;
4888         struct btrfs_inode_item *ii;
4889         struct btrfs_key key;
4890         u64 inode_id;
4891         u32 mode;
4892         u64 nlink;
4893         u64 nbytes;
4894         u64 isize;
4895         u64 size = 0;
4896         u64 refs = 0;
4897         u64 extent_end = 0;
4898         u64 extent_size = 0;
4899         unsigned int dir;
4900         unsigned int nodatasum;
4901         int slot;
4902         int ret;
4903         int err = 0;
4904
4905         node = path->nodes[0];
4906         slot = path->slots[0];
4907
4908         btrfs_item_key_to_cpu(node, &key, slot);
4909         inode_id = key.objectid;
4910
4911         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4912                 ret = btrfs_next_item(root, path);
4913                 if (ret > 0)
4914                         err |= LAST_ITEM;
4915                 return err;
4916         }
4917
4918         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4919         isize = btrfs_inode_size(node, ii);
4920         nbytes = btrfs_inode_nbytes(node, ii);
4921         mode = btrfs_inode_mode(node, ii);
4922         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4923         nlink = btrfs_inode_nlink(node, ii);
4924         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4925
4926         while (1) {
4927                 ret = btrfs_next_item(root, path);
4928                 if (ret < 0) {
4929                         /* out will fill 'err' rusing current statistics */
4930                         goto out;
4931                 } else if (ret > 0) {
4932                         err |= LAST_ITEM;
4933                         goto out;
4934                 }
4935
4936                 node = path->nodes[0];
4937                 slot = path->slots[0];
4938                 btrfs_item_key_to_cpu(node, &key, slot);
4939                 if (key.objectid != inode_id)
4940                         goto out;
4941
4942                 switch (key.type) {
4943                 case BTRFS_INODE_REF_KEY:
4944                         ret = check_inode_ref(root, &key, node, slot, &refs,
4945                                               mode);
4946                         err |= ret;
4947                         break;
4948                 case BTRFS_INODE_EXTREF_KEY:
4949                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4950                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4951                                         root->objectid, key.objectid,
4952                                         key.offset);
4953                         ret = check_inode_extref(root, &key, node, slot, &refs,
4954                                                  mode);
4955                         err |= ret;
4956                         break;
4957                 case BTRFS_DIR_ITEM_KEY:
4958                 case BTRFS_DIR_INDEX_KEY:
4959                         if (!dir) {
4960                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4961                                         root->objectid, inode_id,
4962                                         imode_to_type(mode), key.objectid,
4963                                         key.offset);
4964                         }
4965                         ret = check_dir_item(root, &key, node, slot, &size,
4966                                              ext_ref);
4967                         err |= ret;
4968                         break;
4969                 case BTRFS_EXTENT_DATA_KEY:
4970                         if (dir) {
4971                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4972                                         root->objectid, inode_id, key.objectid,
4973                                         key.offset);
4974                         }
4975                         ret = check_file_extent(root, &key, node, slot,
4976                                                 nodatasum, &extent_size,
4977                                                 &extent_end);
4978                         err |= ret;
4979                         break;
4980                 case BTRFS_XATTR_ITEM_KEY:
4981                         break;
4982                 default:
4983                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4984                               key.objectid, key.type, key.offset);
4985                 }
4986         }
4987
4988 out:
4989         /* verify INODE_ITEM nlink/isize/nbytes */
4990         if (dir) {
4991                 if (nlink != 1) {
4992                         err |= LINK_COUNT_ERROR;
4993                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4994                               root->objectid, inode_id, nlink);
4995                 }
4996
4997                 /*
4998                  * Just a warning, as dir inode nbytes is just an
4999                  * instructive value.
5000                  */
5001                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5002                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5003                                 root->objectid, inode_id,
5004                                 root->fs_info->nodesize);
5005                 }
5006
5007                 if (isize != size) {
5008                         err |= ISIZE_ERROR;
5009                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5010                               root->objectid, inode_id, isize, size);
5011                 }
5012         } else {
5013                 if (nlink != refs) {
5014                         err |= LINK_COUNT_ERROR;
5015                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5016                               root->objectid, inode_id, nlink, refs);
5017                 } else if (!nlink) {
5018                         err |= ORPHAN_ITEM;
5019                 }
5020
5021                 if (!nbytes && !no_holes && extent_end < isize) {
5022                         err |= NBYTES_ERROR;
5023                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5024                               root->objectid, inode_id, isize);
5025                 }
5026
5027                 if (nbytes != extent_size) {
5028                         err |= NBYTES_ERROR;
5029                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5030                               root->objectid, inode_id, nbytes, extent_size);
5031                 }
5032         }
5033
5034         return err;
5035 }
5036
5037 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5038 {
5039         struct btrfs_path path;
5040         struct btrfs_key key;
5041         int err = 0;
5042         int ret;
5043
5044         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5045         key.type = BTRFS_INODE_ITEM_KEY;
5046         key.offset = 0;
5047
5048         /* For root being dropped, we don't need to check first inode */
5049         if (btrfs_root_refs(&root->root_item) == 0 &&
5050             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5051             key.objectid)
5052                 return 0;
5053
5054         btrfs_init_path(&path);
5055
5056         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5057         if (ret < 0)
5058                 goto out;
5059         if (ret > 0) {
5060                 ret = 0;
5061                 err |= INODE_ITEM_MISSING;
5062                 error("first inode item of root %llu is missing",
5063                       root->objectid);
5064         }
5065
5066         err |= check_inode_item(root, &path, ext_ref);
5067         err &= ~LAST_ITEM;
5068         if (err && !ret)
5069                 ret = -EIO;
5070 out:
5071         btrfs_release_path(&path);
5072         return ret;
5073 }
5074
5075 /*
5076  * Iterate all item on the tree and call check_inode_item() to check.
5077  *
5078  * @root:       the root of the tree to be checked.
5079  * @ext_ref:    the EXTENDED_IREF feature
5080  *
5081  * Return 0 if no error found.
5082  * Return <0 for error.
5083  */
5084 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5085 {
5086         struct btrfs_path path;
5087         struct node_refs nrefs;
5088         struct btrfs_root_item *root_item = &root->root_item;
5089         int ret;
5090         int level;
5091         int err = 0;
5092
5093         /*
5094          * We need to manually check the first inode item(256)
5095          * As the following traversal function will only start from
5096          * the first inode item in the leaf, if inode item(256) is missing
5097          * we will just skip it forever.
5098          */
5099         ret = check_fs_first_inode(root, ext_ref);
5100         if (ret < 0)
5101                 return ret;
5102
5103         memset(&nrefs, 0, sizeof(nrefs));
5104         level = btrfs_header_level(root->node);
5105         btrfs_init_path(&path);
5106
5107         if (btrfs_root_refs(root_item) > 0 ||
5108             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5109                 path.nodes[level] = root->node;
5110                 path.slots[level] = 0;
5111                 extent_buffer_get(root->node);
5112         } else {
5113                 struct btrfs_key key;
5114
5115                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5116                 level = root_item->drop_level;
5117                 path.lowest_level = level;
5118                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5119                 if (ret < 0)
5120                         goto out;
5121                 ret = 0;
5122         }
5123
5124         while (1) {
5125                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5126                 err |= !!ret;
5127
5128                 /* if ret is negative, walk shall stop */
5129                 if (ret < 0) {
5130                         ret = err;
5131                         break;
5132                 }
5133
5134                 ret = walk_up_tree_v2(root, &path, &level);
5135                 if (ret != 0) {
5136                         /* Normal exit, reset ret to err */
5137                         ret = err;
5138                         break;
5139                 }
5140         }
5141
5142 out:
5143         btrfs_release_path(&path);
5144         return ret;
5145 }
5146
5147 /*
5148  * Find the relative ref for root_ref and root_backref.
5149  *
5150  * @root:       the root of the root tree.
5151  * @ref_key:    the key of the root ref.
5152  *
5153  * Return 0 if no error occurred.
5154  */
5155 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5156                           struct extent_buffer *node, int slot)
5157 {
5158         struct btrfs_path path;
5159         struct btrfs_key key;
5160         struct btrfs_root_ref *ref;
5161         struct btrfs_root_ref *backref;
5162         char ref_name[BTRFS_NAME_LEN] = {0};
5163         char backref_name[BTRFS_NAME_LEN] = {0};
5164         u64 ref_dirid;
5165         u64 ref_seq;
5166         u32 ref_namelen;
5167         u64 backref_dirid;
5168         u64 backref_seq;
5169         u32 backref_namelen;
5170         u32 len;
5171         int ret;
5172         int err = 0;
5173
5174         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5175         ref_dirid = btrfs_root_ref_dirid(node, ref);
5176         ref_seq = btrfs_root_ref_sequence(node, ref);
5177         ref_namelen = btrfs_root_ref_name_len(node, ref);
5178
5179         if (ref_namelen <= BTRFS_NAME_LEN) {
5180                 len = ref_namelen;
5181         } else {
5182                 len = BTRFS_NAME_LEN;
5183                 warning("%s[%llu %llu] ref_name too long",
5184                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5185                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5186                         ref_key->offset);
5187         }
5188         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5189
5190         /* Find relative root_ref */
5191         key.objectid = ref_key->offset;
5192         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5193         key.offset = ref_key->objectid;
5194
5195         btrfs_init_path(&path);
5196         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5197         if (ret) {
5198                 err |= ROOT_REF_MISSING;
5199                 error("%s[%llu %llu] couldn't find relative ref",
5200                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5201                       "ROOT_REF" : "ROOT_BACKREF",
5202                       ref_key->objectid, ref_key->offset);
5203                 goto out;
5204         }
5205
5206         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5207                                  struct btrfs_root_ref);
5208         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5209         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5210         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5211
5212         if (backref_namelen <= BTRFS_NAME_LEN) {
5213                 len = backref_namelen;
5214         } else {
5215                 len = BTRFS_NAME_LEN;
5216                 warning("%s[%llu %llu] ref_name too long",
5217                         key.type == BTRFS_ROOT_REF_KEY ?
5218                         "ROOT_REF" : "ROOT_BACKREF",
5219                         key.objectid, key.offset);
5220         }
5221         read_extent_buffer(path.nodes[0], backref_name,
5222                            (unsigned long)(backref + 1), len);
5223
5224         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5225             ref_namelen != backref_namelen ||
5226             strncmp(ref_name, backref_name, len)) {
5227                 err |= ROOT_REF_MISMATCH;
5228                 error("%s[%llu %llu] mismatch relative ref",
5229                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5230                       "ROOT_REF" : "ROOT_BACKREF",
5231                       ref_key->objectid, ref_key->offset);
5232         }
5233 out:
5234         btrfs_release_path(&path);
5235         return err;
5236 }
5237
5238 /*
5239  * Check all fs/file tree in low_memory mode.
5240  *
5241  * 1. for fs tree root item, call check_fs_root_v2()
5242  * 2. for fs tree root ref/backref, call check_root_ref()
5243  *
5244  * Return 0 if no error occurred.
5245  */
5246 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5247 {
5248         struct btrfs_root *tree_root = fs_info->tree_root;
5249         struct btrfs_root *cur_root = NULL;
5250         struct btrfs_path path;
5251         struct btrfs_key key;
5252         struct extent_buffer *node;
5253         unsigned int ext_ref;
5254         int slot;
5255         int ret;
5256         int err = 0;
5257
5258         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5259
5260         btrfs_init_path(&path);
5261         key.objectid = BTRFS_FS_TREE_OBJECTID;
5262         key.offset = 0;
5263         key.type = BTRFS_ROOT_ITEM_KEY;
5264
5265         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5266         if (ret < 0) {
5267                 err = ret;
5268                 goto out;
5269         } else if (ret > 0) {
5270                 err = -ENOENT;
5271                 goto out;
5272         }
5273
5274         while (1) {
5275                 node = path.nodes[0];
5276                 slot = path.slots[0];
5277                 btrfs_item_key_to_cpu(node, &key, slot);
5278                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5279                         goto out;
5280                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5281                     fs_root_objectid(key.objectid)) {
5282                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5283                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5284                                                                        &key);
5285                         } else {
5286                                 key.offset = (u64)-1;
5287                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5288                         }
5289
5290                         if (IS_ERR(cur_root)) {
5291                                 error("Fail to read fs/subvol tree: %lld",
5292                                       key.objectid);
5293                                 err = -EIO;
5294                                 goto next;
5295                         }
5296
5297                         ret = check_fs_root_v2(cur_root, ext_ref);
5298                         err |= ret;
5299
5300                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5301                                 btrfs_free_fs_root(cur_root);
5302                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5303                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5304                         ret = check_root_ref(tree_root, &key, node, slot);
5305                         err |= ret;
5306                 }
5307 next:
5308                 ret = btrfs_next_item(tree_root, &path);
5309                 if (ret > 0)
5310                         goto out;
5311                 if (ret < 0) {
5312                         err = ret;
5313                         goto out;
5314                 }
5315         }
5316
5317 out:
5318         btrfs_release_path(&path);
5319         return err;
5320 }
5321
5322 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5323 {
5324         struct list_head *cur = rec->backrefs.next;
5325         struct extent_backref *back;
5326         struct tree_backref *tback;
5327         struct data_backref *dback;
5328         u64 found = 0;
5329         int err = 0;
5330
5331         while(cur != &rec->backrefs) {
5332                 back = to_extent_backref(cur);
5333                 cur = cur->next;
5334                 if (!back->found_extent_tree) {
5335                         err = 1;
5336                         if (!print_errs)
5337                                 goto out;
5338                         if (back->is_data) {
5339                                 dback = to_data_backref(back);
5340                                 fprintf(stderr, "Backref %llu %s %llu"
5341                                         " owner %llu offset %llu num_refs %lu"
5342                                         " not found in extent tree\n",
5343                                         (unsigned long long)rec->start,
5344                                         back->full_backref ?
5345                                         "parent" : "root",
5346                                         back->full_backref ?
5347                                         (unsigned long long)dback->parent:
5348                                         (unsigned long long)dback->root,
5349                                         (unsigned long long)dback->owner,
5350                                         (unsigned long long)dback->offset,
5351                                         (unsigned long)dback->num_refs);
5352                         } else {
5353                                 tback = to_tree_backref(back);
5354                                 fprintf(stderr, "Backref %llu parent %llu"
5355                                         " root %llu not found in extent tree\n",
5356                                         (unsigned long long)rec->start,
5357                                         (unsigned long long)tback->parent,
5358                                         (unsigned long long)tback->root);
5359                         }
5360                 }
5361                 if (!back->is_data && !back->found_ref) {
5362                         err = 1;
5363                         if (!print_errs)
5364                                 goto out;
5365                         tback = to_tree_backref(back);
5366                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5367                                 (unsigned long long)rec->start,
5368                                 back->full_backref ? "parent" : "root",
5369                                 back->full_backref ?
5370                                 (unsigned long long)tback->parent :
5371                                 (unsigned long long)tback->root, back);
5372                 }
5373                 if (back->is_data) {
5374                         dback = to_data_backref(back);
5375                         if (dback->found_ref != dback->num_refs) {
5376                                 err = 1;
5377                                 if (!print_errs)
5378                                         goto out;
5379                                 fprintf(stderr, "Incorrect local backref count"
5380                                         " on %llu %s %llu owner %llu"
5381                                         " offset %llu found %u wanted %u back %p\n",
5382                                         (unsigned long long)rec->start,
5383                                         back->full_backref ?
5384                                         "parent" : "root",
5385                                         back->full_backref ?
5386                                         (unsigned long long)dback->parent:
5387                                         (unsigned long long)dback->root,
5388                                         (unsigned long long)dback->owner,
5389                                         (unsigned long long)dback->offset,
5390                                         dback->found_ref, dback->num_refs, back);
5391                         }
5392                         if (dback->disk_bytenr != rec->start) {
5393                                 err = 1;
5394                                 if (!print_errs)
5395                                         goto out;
5396                                 fprintf(stderr, "Backref disk bytenr does not"
5397                                         " match extent record, bytenr=%llu, "
5398                                         "ref bytenr=%llu\n",
5399                                         (unsigned long long)rec->start,
5400                                         (unsigned long long)dback->disk_bytenr);
5401                         }
5402
5403                         if (dback->bytes != rec->nr) {
5404                                 err = 1;
5405                                 if (!print_errs)
5406                                         goto out;
5407                                 fprintf(stderr, "Backref bytes do not match "
5408                                         "extent backref, bytenr=%llu, ref "
5409                                         "bytes=%llu, backref bytes=%llu\n",
5410                                         (unsigned long long)rec->start,
5411                                         (unsigned long long)rec->nr,
5412                                         (unsigned long long)dback->bytes);
5413                         }
5414                 }
5415                 if (!back->is_data) {
5416                         found += 1;
5417                 } else {
5418                         dback = to_data_backref(back);
5419                         found += dback->found_ref;
5420                 }
5421         }
5422         if (found != rec->refs) {
5423                 err = 1;
5424                 if (!print_errs)
5425                         goto out;
5426                 fprintf(stderr, "Incorrect global backref count "
5427                         "on %llu found %llu wanted %llu\n",
5428                         (unsigned long long)rec->start,
5429                         (unsigned long long)found,
5430                         (unsigned long long)rec->refs);
5431         }
5432 out:
5433         return err;
5434 }
5435
5436 static int free_all_extent_backrefs(struct extent_record *rec)
5437 {
5438         struct extent_backref *back;
5439         struct list_head *cur;
5440         while (!list_empty(&rec->backrefs)) {
5441                 cur = rec->backrefs.next;
5442                 back = to_extent_backref(cur);
5443                 list_del(cur);
5444                 free(back);
5445         }
5446         return 0;
5447 }
5448
5449 static void free_extent_record_cache(struct cache_tree *extent_cache)
5450 {
5451         struct cache_extent *cache;
5452         struct extent_record *rec;
5453
5454         while (1) {
5455                 cache = first_cache_extent(extent_cache);
5456                 if (!cache)
5457                         break;
5458                 rec = container_of(cache, struct extent_record, cache);
5459                 remove_cache_extent(extent_cache, cache);
5460                 free_all_extent_backrefs(rec);
5461                 free(rec);
5462         }
5463 }
5464
5465 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5466                                  struct extent_record *rec)
5467 {
5468         if (rec->content_checked && rec->owner_ref_checked &&
5469             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5470             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5471             !rec->bad_full_backref && !rec->crossing_stripes &&
5472             !rec->wrong_chunk_type) {
5473                 remove_cache_extent(extent_cache, &rec->cache);
5474                 free_all_extent_backrefs(rec);
5475                 list_del_init(&rec->list);
5476                 free(rec);
5477         }
5478         return 0;
5479 }
5480
5481 static int check_owner_ref(struct btrfs_root *root,
5482                             struct extent_record *rec,
5483                             struct extent_buffer *buf)
5484 {
5485         struct extent_backref *node;
5486         struct tree_backref *back;
5487         struct btrfs_root *ref_root;
5488         struct btrfs_key key;
5489         struct btrfs_path path;
5490         struct extent_buffer *parent;
5491         int level;
5492         int found = 0;
5493         int ret;
5494
5495         list_for_each_entry(node, &rec->backrefs, list) {
5496                 if (node->is_data)
5497                         continue;
5498                 if (!node->found_ref)
5499                         continue;
5500                 if (node->full_backref)
5501                         continue;
5502                 back = to_tree_backref(node);
5503                 if (btrfs_header_owner(buf) == back->root)
5504                         return 0;
5505         }
5506         BUG_ON(rec->is_root);
5507
5508         /* try to find the block by search corresponding fs tree */
5509         key.objectid = btrfs_header_owner(buf);
5510         key.type = BTRFS_ROOT_ITEM_KEY;
5511         key.offset = (u64)-1;
5512
5513         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5514         if (IS_ERR(ref_root))
5515                 return 1;
5516
5517         level = btrfs_header_level(buf);
5518         if (level == 0)
5519                 btrfs_item_key_to_cpu(buf, &key, 0);
5520         else
5521                 btrfs_node_key_to_cpu(buf, &key, 0);
5522
5523         btrfs_init_path(&path);
5524         path.lowest_level = level + 1;
5525         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5526         if (ret < 0)
5527                 return 0;
5528
5529         parent = path.nodes[level + 1];
5530         if (parent && buf->start == btrfs_node_blockptr(parent,
5531                                                         path.slots[level + 1]))
5532                 found = 1;
5533
5534         btrfs_release_path(&path);
5535         return found ? 0 : 1;
5536 }
5537
5538 static int is_extent_tree_record(struct extent_record *rec)
5539 {
5540         struct list_head *cur = rec->backrefs.next;
5541         struct extent_backref *node;
5542         struct tree_backref *back;
5543         int is_extent = 0;
5544
5545         while(cur != &rec->backrefs) {
5546                 node = to_extent_backref(cur);
5547                 cur = cur->next;
5548                 if (node->is_data)
5549                         return 0;
5550                 back = to_tree_backref(node);
5551                 if (node->full_backref)
5552                         return 0;
5553                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5554                         is_extent = 1;
5555         }
5556         return is_extent;
5557 }
5558
5559
5560 static int record_bad_block_io(struct btrfs_fs_info *info,
5561                                struct cache_tree *extent_cache,
5562                                u64 start, u64 len)
5563 {
5564         struct extent_record *rec;
5565         struct cache_extent *cache;
5566         struct btrfs_key key;
5567
5568         cache = lookup_cache_extent(extent_cache, start, len);
5569         if (!cache)
5570                 return 0;
5571
5572         rec = container_of(cache, struct extent_record, cache);
5573         if (!is_extent_tree_record(rec))
5574                 return 0;
5575
5576         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5577         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5578 }
5579
5580 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5581                        struct extent_buffer *buf, int slot)
5582 {
5583         if (btrfs_header_level(buf)) {
5584                 struct btrfs_key_ptr ptr1, ptr2;
5585
5586                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5587                                    sizeof(struct btrfs_key_ptr));
5588                 read_extent_buffer(buf, &ptr2,
5589                                    btrfs_node_key_ptr_offset(slot + 1),
5590                                    sizeof(struct btrfs_key_ptr));
5591                 write_extent_buffer(buf, &ptr1,
5592                                     btrfs_node_key_ptr_offset(slot + 1),
5593                                     sizeof(struct btrfs_key_ptr));
5594                 write_extent_buffer(buf, &ptr2,
5595                                     btrfs_node_key_ptr_offset(slot),
5596                                     sizeof(struct btrfs_key_ptr));
5597                 if (slot == 0) {
5598                         struct btrfs_disk_key key;
5599                         btrfs_node_key(buf, &key, 0);
5600                         btrfs_fixup_low_keys(root, path, &key,
5601                                              btrfs_header_level(buf) + 1);
5602                 }
5603         } else {
5604                 struct btrfs_item *item1, *item2;
5605                 struct btrfs_key k1, k2;
5606                 char *item1_data, *item2_data;
5607                 u32 item1_offset, item2_offset, item1_size, item2_size;
5608
5609                 item1 = btrfs_item_nr(slot);
5610                 item2 = btrfs_item_nr(slot + 1);
5611                 btrfs_item_key_to_cpu(buf, &k1, slot);
5612                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5613                 item1_offset = btrfs_item_offset(buf, item1);
5614                 item2_offset = btrfs_item_offset(buf, item2);
5615                 item1_size = btrfs_item_size(buf, item1);
5616                 item2_size = btrfs_item_size(buf, item2);
5617
5618                 item1_data = malloc(item1_size);
5619                 if (!item1_data)
5620                         return -ENOMEM;
5621                 item2_data = malloc(item2_size);
5622                 if (!item2_data) {
5623                         free(item1_data);
5624                         return -ENOMEM;
5625                 }
5626
5627                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5628                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5629
5630                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5631                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5632                 free(item1_data);
5633                 free(item2_data);
5634
5635                 btrfs_set_item_offset(buf, item1, item2_offset);
5636                 btrfs_set_item_offset(buf, item2, item1_offset);
5637                 btrfs_set_item_size(buf, item1, item2_size);
5638                 btrfs_set_item_size(buf, item2, item1_size);
5639
5640                 path->slots[0] = slot;
5641                 btrfs_set_item_key_unsafe(root, path, &k2);
5642                 path->slots[0] = slot + 1;
5643                 btrfs_set_item_key_unsafe(root, path, &k1);
5644         }
5645         return 0;
5646 }
5647
5648 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5649 {
5650         struct extent_buffer *buf;
5651         struct btrfs_key k1, k2;
5652         int i;
5653         int level = path->lowest_level;
5654         int ret = -EIO;
5655
5656         buf = path->nodes[level];
5657         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5658                 if (level) {
5659                         btrfs_node_key_to_cpu(buf, &k1, i);
5660                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5661                 } else {
5662                         btrfs_item_key_to_cpu(buf, &k1, i);
5663                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5664                 }
5665                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5666                         continue;
5667                 ret = swap_values(root, path, buf, i);
5668                 if (ret)
5669                         break;
5670                 btrfs_mark_buffer_dirty(buf);
5671                 i = 0;
5672         }
5673         return ret;
5674 }
5675
5676 static int delete_bogus_item(struct btrfs_root *root,
5677                              struct btrfs_path *path,
5678                              struct extent_buffer *buf, int slot)
5679 {
5680         struct btrfs_key key;
5681         int nritems = btrfs_header_nritems(buf);
5682
5683         btrfs_item_key_to_cpu(buf, &key, slot);
5684
5685         /* These are all the keys we can deal with missing. */
5686         if (key.type != BTRFS_DIR_INDEX_KEY &&
5687             key.type != BTRFS_EXTENT_ITEM_KEY &&
5688             key.type != BTRFS_METADATA_ITEM_KEY &&
5689             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5690             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5691                 return -1;
5692
5693         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5694                (unsigned long long)key.objectid, key.type,
5695                (unsigned long long)key.offset, slot, buf->start);
5696         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5697                               btrfs_item_nr_offset(slot + 1),
5698                               sizeof(struct btrfs_item) *
5699                               (nritems - slot - 1));
5700         btrfs_set_header_nritems(buf, nritems - 1);
5701         if (slot == 0) {
5702                 struct btrfs_disk_key disk_key;
5703
5704                 btrfs_item_key(buf, &disk_key, 0);
5705                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5706         }
5707         btrfs_mark_buffer_dirty(buf);
5708         return 0;
5709 }
5710
5711 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5712 {
5713         struct extent_buffer *buf;
5714         int i;
5715         int ret = 0;
5716
5717         /* We should only get this for leaves */
5718         BUG_ON(path->lowest_level);
5719         buf = path->nodes[0];
5720 again:
5721         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5722                 unsigned int shift = 0, offset;
5723
5724                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5725                     BTRFS_LEAF_DATA_SIZE(root)) {
5726                         if (btrfs_item_end_nr(buf, i) >
5727                             BTRFS_LEAF_DATA_SIZE(root)) {
5728                                 ret = delete_bogus_item(root, path, buf, i);
5729                                 if (!ret)
5730                                         goto again;
5731                                 fprintf(stderr, "item is off the end of the "
5732                                         "leaf, can't fix\n");
5733                                 ret = -EIO;
5734                                 break;
5735                         }
5736                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5737                                 btrfs_item_end_nr(buf, i);
5738                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5739                            btrfs_item_offset_nr(buf, i - 1)) {
5740                         if (btrfs_item_end_nr(buf, i) >
5741                             btrfs_item_offset_nr(buf, i - 1)) {
5742                                 ret = delete_bogus_item(root, path, buf, i);
5743                                 if (!ret)
5744                                         goto again;
5745                                 fprintf(stderr, "items overlap, can't fix\n");
5746                                 ret = -EIO;
5747                                 break;
5748                         }
5749                         shift = btrfs_item_offset_nr(buf, i - 1) -
5750                                 btrfs_item_end_nr(buf, i);
5751                 }
5752                 if (!shift)
5753                         continue;
5754
5755                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5756                        i, shift, (unsigned long long)buf->start);
5757                 offset = btrfs_item_offset_nr(buf, i);
5758                 memmove_extent_buffer(buf,
5759                                       btrfs_leaf_data(buf) + offset + shift,
5760                                       btrfs_leaf_data(buf) + offset,
5761                                       btrfs_item_size_nr(buf, i));
5762                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5763                                       offset + shift);
5764                 btrfs_mark_buffer_dirty(buf);
5765         }
5766
5767         /*
5768          * We may have moved things, in which case we want to exit so we don't
5769          * write those changes out.  Once we have proper abort functionality in
5770          * progs this can be changed to something nicer.
5771          */
5772         BUG_ON(ret);
5773         return ret;
5774 }
5775
5776 /*
5777  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5778  * then just return -EIO.
5779  */
5780 static int try_to_fix_bad_block(struct btrfs_root *root,
5781                                 struct extent_buffer *buf,
5782                                 enum btrfs_tree_block_status status)
5783 {
5784         struct btrfs_trans_handle *trans;
5785         struct ulist *roots;
5786         struct ulist_node *node;
5787         struct btrfs_root *search_root;
5788         struct btrfs_path path;
5789         struct ulist_iterator iter;
5790         struct btrfs_key root_key, key;
5791         int ret;
5792
5793         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5794             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5795                 return -EIO;
5796
5797         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5798         if (ret)
5799                 return -EIO;
5800
5801         btrfs_init_path(&path);
5802         ULIST_ITER_INIT(&iter);
5803         while ((node = ulist_next(roots, &iter))) {
5804                 root_key.objectid = node->val;
5805                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5806                 root_key.offset = (u64)-1;
5807
5808                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5809                 if (IS_ERR(root)) {
5810                         ret = -EIO;
5811                         break;
5812                 }
5813
5814
5815                 trans = btrfs_start_transaction(search_root, 0);
5816                 if (IS_ERR(trans)) {
5817                         ret = PTR_ERR(trans);
5818                         break;
5819                 }
5820
5821                 path.lowest_level = btrfs_header_level(buf);
5822                 path.skip_check_block = 1;
5823                 if (path.lowest_level)
5824                         btrfs_node_key_to_cpu(buf, &key, 0);
5825                 else
5826                         btrfs_item_key_to_cpu(buf, &key, 0);
5827                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5828                 if (ret) {
5829                         ret = -EIO;
5830                         btrfs_commit_transaction(trans, search_root);
5831                         break;
5832                 }
5833                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5834                         ret = fix_key_order(search_root, &path);
5835                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5836                         ret = fix_item_offset(search_root, &path);
5837                 if (ret) {
5838                         btrfs_commit_transaction(trans, search_root);
5839                         break;
5840                 }
5841                 btrfs_release_path(&path);
5842                 btrfs_commit_transaction(trans, search_root);
5843         }
5844         ulist_free(roots);
5845         btrfs_release_path(&path);
5846         return ret;
5847 }
5848
5849 static int check_block(struct btrfs_root *root,
5850                        struct cache_tree *extent_cache,
5851                        struct extent_buffer *buf, u64 flags)
5852 {
5853         struct extent_record *rec;
5854         struct cache_extent *cache;
5855         struct btrfs_key key;
5856         enum btrfs_tree_block_status status;
5857         int ret = 0;
5858         int level;
5859
5860         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5861         if (!cache)
5862                 return 1;
5863         rec = container_of(cache, struct extent_record, cache);
5864         rec->generation = btrfs_header_generation(buf);
5865
5866         level = btrfs_header_level(buf);
5867         if (btrfs_header_nritems(buf) > 0) {
5868
5869                 if (level == 0)
5870                         btrfs_item_key_to_cpu(buf, &key, 0);
5871                 else
5872                         btrfs_node_key_to_cpu(buf, &key, 0);
5873
5874                 rec->info_objectid = key.objectid;
5875         }
5876         rec->info_level = level;
5877
5878         if (btrfs_is_leaf(buf))
5879                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5880         else
5881                 status = btrfs_check_node(root, &rec->parent_key, buf);
5882
5883         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5884                 if (repair)
5885                         status = try_to_fix_bad_block(root, buf, status);
5886                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5887                         ret = -EIO;
5888                         fprintf(stderr, "bad block %llu\n",
5889                                 (unsigned long long)buf->start);
5890                 } else {
5891                         /*
5892                          * Signal to callers we need to start the scan over
5893                          * again since we'll have cowed blocks.
5894                          */
5895                         ret = -EAGAIN;
5896                 }
5897         } else {
5898                 rec->content_checked = 1;
5899                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5900                         rec->owner_ref_checked = 1;
5901                 else {
5902                         ret = check_owner_ref(root, rec, buf);
5903                         if (!ret)
5904                                 rec->owner_ref_checked = 1;
5905                 }
5906         }
5907         if (!ret)
5908                 maybe_free_extent_rec(extent_cache, rec);
5909         return ret;
5910 }
5911
5912 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5913                                                 u64 parent, u64 root)
5914 {
5915         struct list_head *cur = rec->backrefs.next;
5916         struct extent_backref *node;
5917         struct tree_backref *back;
5918
5919         while(cur != &rec->backrefs) {
5920                 node = to_extent_backref(cur);
5921                 cur = cur->next;
5922                 if (node->is_data)
5923                         continue;
5924                 back = to_tree_backref(node);
5925                 if (parent > 0) {
5926                         if (!node->full_backref)
5927                                 continue;
5928                         if (parent == back->parent)
5929                                 return back;
5930                 } else {
5931                         if (node->full_backref)
5932                                 continue;
5933                         if (back->root == root)
5934                                 return back;
5935                 }
5936         }
5937         return NULL;
5938 }
5939
5940 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5941                                                 u64 parent, u64 root)
5942 {
5943         struct tree_backref *ref = malloc(sizeof(*ref));
5944
5945         if (!ref)
5946                 return NULL;
5947         memset(&ref->node, 0, sizeof(ref->node));
5948         if (parent > 0) {
5949                 ref->parent = parent;
5950                 ref->node.full_backref = 1;
5951         } else {
5952                 ref->root = root;
5953                 ref->node.full_backref = 0;
5954         }
5955         list_add_tail(&ref->node.list, &rec->backrefs);
5956
5957         return ref;
5958 }
5959
5960 static struct data_backref *find_data_backref(struct extent_record *rec,
5961                                                 u64 parent, u64 root,
5962                                                 u64 owner, u64 offset,
5963                                                 int found_ref,
5964                                                 u64 disk_bytenr, u64 bytes)
5965 {
5966         struct list_head *cur = rec->backrefs.next;
5967         struct extent_backref *node;
5968         struct data_backref *back;
5969
5970         while(cur != &rec->backrefs) {
5971                 node = to_extent_backref(cur);
5972                 cur = cur->next;
5973                 if (!node->is_data)
5974                         continue;
5975                 back = to_data_backref(node);
5976                 if (parent > 0) {
5977                         if (!node->full_backref)
5978                                 continue;
5979                         if (parent == back->parent)
5980                                 return back;
5981                 } else {
5982                         if (node->full_backref)
5983                                 continue;
5984                         if (back->root == root && back->owner == owner &&
5985                             back->offset == offset) {
5986                                 if (found_ref && node->found_ref &&
5987                                     (back->bytes != bytes ||
5988                                     back->disk_bytenr != disk_bytenr))
5989                                         continue;
5990                                 return back;
5991                         }
5992                 }
5993         }
5994         return NULL;
5995 }
5996
5997 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5998                                                 u64 parent, u64 root,
5999                                                 u64 owner, u64 offset,
6000                                                 u64 max_size)
6001 {
6002         struct data_backref *ref = malloc(sizeof(*ref));
6003
6004         if (!ref)
6005                 return NULL;
6006         memset(&ref->node, 0, sizeof(ref->node));
6007         ref->node.is_data = 1;
6008
6009         if (parent > 0) {
6010                 ref->parent = parent;
6011                 ref->owner = 0;
6012                 ref->offset = 0;
6013                 ref->node.full_backref = 1;
6014         } else {
6015                 ref->root = root;
6016                 ref->owner = owner;
6017                 ref->offset = offset;
6018                 ref->node.full_backref = 0;
6019         }
6020         ref->bytes = max_size;
6021         ref->found_ref = 0;
6022         ref->num_refs = 0;
6023         list_add_tail(&ref->node.list, &rec->backrefs);
6024         if (max_size > rec->max_size)
6025                 rec->max_size = max_size;
6026         return ref;
6027 }
6028
6029 /* Check if the type of extent matches with its chunk */
6030 static void check_extent_type(struct extent_record *rec)
6031 {
6032         struct btrfs_block_group_cache *bg_cache;
6033
6034         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6035         if (!bg_cache)
6036                 return;
6037
6038         /* data extent, check chunk directly*/
6039         if (!rec->metadata) {
6040                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6041                         rec->wrong_chunk_type = 1;
6042                 return;
6043         }
6044
6045         /* metadata extent, check the obvious case first */
6046         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6047                                  BTRFS_BLOCK_GROUP_METADATA))) {
6048                 rec->wrong_chunk_type = 1;
6049                 return;
6050         }
6051
6052         /*
6053          * Check SYSTEM extent, as it's also marked as metadata, we can only
6054          * make sure it's a SYSTEM extent by its backref
6055          */
6056         if (!list_empty(&rec->backrefs)) {
6057                 struct extent_backref *node;
6058                 struct tree_backref *tback;
6059                 u64 bg_type;
6060
6061                 node = to_extent_backref(rec->backrefs.next);
6062                 if (node->is_data) {
6063                         /* tree block shouldn't have data backref */
6064                         rec->wrong_chunk_type = 1;
6065                         return;
6066                 }
6067                 tback = container_of(node, struct tree_backref, node);
6068
6069                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6070                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6071                 else
6072                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6073                 if (!(bg_cache->flags & bg_type))
6074                         rec->wrong_chunk_type = 1;
6075         }
6076 }
6077
6078 /*
6079  * Allocate a new extent record, fill default values from @tmpl and insert int
6080  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6081  * the cache, otherwise it fails.
6082  */
6083 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6084                 struct extent_record *tmpl)
6085 {
6086         struct extent_record *rec;
6087         int ret = 0;
6088
6089         BUG_ON(tmpl->max_size == 0);
6090         rec = malloc(sizeof(*rec));
6091         if (!rec)
6092                 return -ENOMEM;
6093         rec->start = tmpl->start;
6094         rec->max_size = tmpl->max_size;
6095         rec->nr = max(tmpl->nr, tmpl->max_size);
6096         rec->found_rec = tmpl->found_rec;
6097         rec->content_checked = tmpl->content_checked;
6098         rec->owner_ref_checked = tmpl->owner_ref_checked;
6099         rec->num_duplicates = 0;
6100         rec->metadata = tmpl->metadata;
6101         rec->flag_block_full_backref = FLAG_UNSET;
6102         rec->bad_full_backref = 0;
6103         rec->crossing_stripes = 0;
6104         rec->wrong_chunk_type = 0;
6105         rec->is_root = tmpl->is_root;
6106         rec->refs = tmpl->refs;
6107         rec->extent_item_refs = tmpl->extent_item_refs;
6108         rec->parent_generation = tmpl->parent_generation;
6109         INIT_LIST_HEAD(&rec->backrefs);
6110         INIT_LIST_HEAD(&rec->dups);
6111         INIT_LIST_HEAD(&rec->list);
6112         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6113         rec->cache.start = tmpl->start;
6114         rec->cache.size = tmpl->nr;
6115         ret = insert_cache_extent(extent_cache, &rec->cache);
6116         if (ret) {
6117                 free(rec);
6118                 return ret;
6119         }
6120         bytes_used += rec->nr;
6121
6122         if (tmpl->metadata)
6123                 rec->crossing_stripes = check_crossing_stripes(global_info,
6124                                 rec->start, global_info->nodesize);
6125         check_extent_type(rec);
6126         return ret;
6127 }
6128
6129 /*
6130  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6131  * some are hints:
6132  * - refs              - if found, increase refs
6133  * - is_root           - if found, set
6134  * - content_checked   - if found, set
6135  * - owner_ref_checked - if found, set
6136  *
6137  * If not found, create a new one, initialize and insert.
6138  */
6139 static int add_extent_rec(struct cache_tree *extent_cache,
6140                 struct extent_record *tmpl)
6141 {
6142         struct extent_record *rec;
6143         struct cache_extent *cache;
6144         int ret = 0;
6145         int dup = 0;
6146
6147         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6148         if (cache) {
6149                 rec = container_of(cache, struct extent_record, cache);
6150                 if (tmpl->refs)
6151                         rec->refs++;
6152                 if (rec->nr == 1)
6153                         rec->nr = max(tmpl->nr, tmpl->max_size);
6154
6155                 /*
6156                  * We need to make sure to reset nr to whatever the extent
6157                  * record says was the real size, this way we can compare it to
6158                  * the backrefs.
6159                  */
6160                 if (tmpl->found_rec) {
6161                         if (tmpl->start != rec->start || rec->found_rec) {
6162                                 struct extent_record *tmp;
6163
6164                                 dup = 1;
6165                                 if (list_empty(&rec->list))
6166                                         list_add_tail(&rec->list,
6167                                                       &duplicate_extents);
6168
6169                                 /*
6170                                  * We have to do this song and dance in case we
6171                                  * find an extent record that falls inside of
6172                                  * our current extent record but does not have
6173                                  * the same objectid.
6174                                  */
6175                                 tmp = malloc(sizeof(*tmp));
6176                                 if (!tmp)
6177                                         return -ENOMEM;
6178                                 tmp->start = tmpl->start;
6179                                 tmp->max_size = tmpl->max_size;
6180                                 tmp->nr = tmpl->nr;
6181                                 tmp->found_rec = 1;
6182                                 tmp->metadata = tmpl->metadata;
6183                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6184                                 INIT_LIST_HEAD(&tmp->list);
6185                                 list_add_tail(&tmp->list, &rec->dups);
6186                                 rec->num_duplicates++;
6187                         } else {
6188                                 rec->nr = tmpl->nr;
6189                                 rec->found_rec = 1;
6190                         }
6191                 }
6192
6193                 if (tmpl->extent_item_refs && !dup) {
6194                         if (rec->extent_item_refs) {
6195                                 fprintf(stderr, "block %llu rec "
6196                                         "extent_item_refs %llu, passed %llu\n",
6197                                         (unsigned long long)tmpl->start,
6198                                         (unsigned long long)
6199                                                         rec->extent_item_refs,
6200                                         (unsigned long long)tmpl->extent_item_refs);
6201                         }
6202                         rec->extent_item_refs = tmpl->extent_item_refs;
6203                 }
6204                 if (tmpl->is_root)
6205                         rec->is_root = 1;
6206                 if (tmpl->content_checked)
6207                         rec->content_checked = 1;
6208                 if (tmpl->owner_ref_checked)
6209                         rec->owner_ref_checked = 1;
6210                 memcpy(&rec->parent_key, &tmpl->parent_key,
6211                                 sizeof(tmpl->parent_key));
6212                 if (tmpl->parent_generation)
6213                         rec->parent_generation = tmpl->parent_generation;
6214                 if (rec->max_size < tmpl->max_size)
6215                         rec->max_size = tmpl->max_size;
6216
6217                 /*
6218                  * A metadata extent can't cross stripe_len boundary, otherwise
6219                  * kernel scrub won't be able to handle it.
6220                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6221                  * it.
6222                  */
6223                 if (tmpl->metadata)
6224                         rec->crossing_stripes = check_crossing_stripes(
6225                                         global_info, rec->start,
6226                                         global_info->nodesize);
6227                 check_extent_type(rec);
6228                 maybe_free_extent_rec(extent_cache, rec);
6229                 return ret;
6230         }
6231
6232         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6233
6234         return ret;
6235 }
6236
6237 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6238                             u64 parent, u64 root, int found_ref)
6239 {
6240         struct extent_record *rec;
6241         struct tree_backref *back;
6242         struct cache_extent *cache;
6243         int ret;
6244
6245         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6246         if (!cache) {
6247                 struct extent_record tmpl;
6248
6249                 memset(&tmpl, 0, sizeof(tmpl));
6250                 tmpl.start = bytenr;
6251                 tmpl.nr = 1;
6252                 tmpl.metadata = 1;
6253                 tmpl.max_size = 1;
6254
6255                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6256                 if (ret)
6257                         return ret;
6258
6259                 /* really a bug in cache_extent implement now */
6260                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6261                 if (!cache)
6262                         return -ENOENT;
6263         }
6264
6265         rec = container_of(cache, struct extent_record, cache);
6266         if (rec->start != bytenr) {
6267                 /*
6268                  * Several cause, from unaligned bytenr to over lapping extents
6269                  */
6270                 return -EEXIST;
6271         }
6272
6273         back = find_tree_backref(rec, parent, root);
6274         if (!back) {
6275                 back = alloc_tree_backref(rec, parent, root);
6276                 if (!back)
6277                         return -ENOMEM;
6278         }
6279
6280         if (found_ref) {
6281                 if (back->node.found_ref) {
6282                         fprintf(stderr, "Extent back ref already exists "
6283                                 "for %llu parent %llu root %llu \n",
6284                                 (unsigned long long)bytenr,
6285                                 (unsigned long long)parent,
6286                                 (unsigned long long)root);
6287                 }
6288                 back->node.found_ref = 1;
6289         } else {
6290                 if (back->node.found_extent_tree) {
6291                         fprintf(stderr, "Extent back ref already exists "
6292                                 "for %llu parent %llu root %llu \n",
6293                                 (unsigned long long)bytenr,
6294                                 (unsigned long long)parent,
6295                                 (unsigned long long)root);
6296                 }
6297                 back->node.found_extent_tree = 1;
6298         }
6299         check_extent_type(rec);
6300         maybe_free_extent_rec(extent_cache, rec);
6301         return 0;
6302 }
6303
6304 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6305                             u64 parent, u64 root, u64 owner, u64 offset,
6306                             u32 num_refs, int found_ref, u64 max_size)
6307 {
6308         struct extent_record *rec;
6309         struct data_backref *back;
6310         struct cache_extent *cache;
6311         int ret;
6312
6313         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6314         if (!cache) {
6315                 struct extent_record tmpl;
6316
6317                 memset(&tmpl, 0, sizeof(tmpl));
6318                 tmpl.start = bytenr;
6319                 tmpl.nr = 1;
6320                 tmpl.max_size = max_size;
6321
6322                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6323                 if (ret)
6324                         return ret;
6325
6326                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6327                 if (!cache)
6328                         abort();
6329         }
6330
6331         rec = container_of(cache, struct extent_record, cache);
6332         if (rec->max_size < max_size)
6333                 rec->max_size = max_size;
6334
6335         /*
6336          * If found_ref is set then max_size is the real size and must match the
6337          * existing refs.  So if we have already found a ref then we need to
6338          * make sure that this ref matches the existing one, otherwise we need
6339          * to add a new backref so we can notice that the backrefs don't match
6340          * and we need to figure out who is telling the truth.  This is to
6341          * account for that awful fsync bug I introduced where we'd end up with
6342          * a btrfs_file_extent_item that would have its length include multiple
6343          * prealloc extents or point inside of a prealloc extent.
6344          */
6345         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6346                                  bytenr, max_size);
6347         if (!back) {
6348                 back = alloc_data_backref(rec, parent, root, owner, offset,
6349                                           max_size);
6350                 BUG_ON(!back);
6351         }
6352
6353         if (found_ref) {
6354                 BUG_ON(num_refs != 1);
6355                 if (back->node.found_ref)
6356                         BUG_ON(back->bytes != max_size);
6357                 back->node.found_ref = 1;
6358                 back->found_ref += 1;
6359                 back->bytes = max_size;
6360                 back->disk_bytenr = bytenr;
6361                 rec->refs += 1;
6362                 rec->content_checked = 1;
6363                 rec->owner_ref_checked = 1;
6364         } else {
6365                 if (back->node.found_extent_tree) {
6366                         fprintf(stderr, "Extent back ref already exists "
6367                                 "for %llu parent %llu root %llu "
6368                                 "owner %llu offset %llu num_refs %lu\n",
6369                                 (unsigned long long)bytenr,
6370                                 (unsigned long long)parent,
6371                                 (unsigned long long)root,
6372                                 (unsigned long long)owner,
6373                                 (unsigned long long)offset,
6374                                 (unsigned long)num_refs);
6375                 }
6376                 back->num_refs = num_refs;
6377                 back->node.found_extent_tree = 1;
6378         }
6379         maybe_free_extent_rec(extent_cache, rec);
6380         return 0;
6381 }
6382
6383 static int add_pending(struct cache_tree *pending,
6384                        struct cache_tree *seen, u64 bytenr, u32 size)
6385 {
6386         int ret;
6387         ret = add_cache_extent(seen, bytenr, size);
6388         if (ret)
6389                 return ret;
6390         add_cache_extent(pending, bytenr, size);
6391         return 0;
6392 }
6393
6394 static int pick_next_pending(struct cache_tree *pending,
6395                         struct cache_tree *reada,
6396                         struct cache_tree *nodes,
6397                         u64 last, struct block_info *bits, int bits_nr,
6398                         int *reada_bits)
6399 {
6400         unsigned long node_start = last;
6401         struct cache_extent *cache;
6402         int ret;
6403
6404         cache = search_cache_extent(reada, 0);
6405         if (cache) {
6406                 bits[0].start = cache->start;
6407                 bits[0].size = cache->size;
6408                 *reada_bits = 1;
6409                 return 1;
6410         }
6411         *reada_bits = 0;
6412         if (node_start > 32768)
6413                 node_start -= 32768;
6414
6415         cache = search_cache_extent(nodes, node_start);
6416         if (!cache)
6417                 cache = search_cache_extent(nodes, 0);
6418
6419         if (!cache) {
6420                  cache = search_cache_extent(pending, 0);
6421                  if (!cache)
6422                          return 0;
6423                  ret = 0;
6424                  do {
6425                          bits[ret].start = cache->start;
6426                          bits[ret].size = cache->size;
6427                          cache = next_cache_extent(cache);
6428                          ret++;
6429                  } while (cache && ret < bits_nr);
6430                  return ret;
6431         }
6432
6433         ret = 0;
6434         do {
6435                 bits[ret].start = cache->start;
6436                 bits[ret].size = cache->size;
6437                 cache = next_cache_extent(cache);
6438                 ret++;
6439         } while (cache && ret < bits_nr);
6440
6441         if (bits_nr - ret > 8) {
6442                 u64 lookup = bits[0].start + bits[0].size;
6443                 struct cache_extent *next;
6444                 next = search_cache_extent(pending, lookup);
6445                 while(next) {
6446                         if (next->start - lookup > 32768)
6447                                 break;
6448                         bits[ret].start = next->start;
6449                         bits[ret].size = next->size;
6450                         lookup = next->start + next->size;
6451                         ret++;
6452                         if (ret == bits_nr)
6453                                 break;
6454                         next = next_cache_extent(next);
6455                         if (!next)
6456                                 break;
6457                 }
6458         }
6459         return ret;
6460 }
6461
6462 static void free_chunk_record(struct cache_extent *cache)
6463 {
6464         struct chunk_record *rec;
6465
6466         rec = container_of(cache, struct chunk_record, cache);
6467         list_del_init(&rec->list);
6468         list_del_init(&rec->dextents);
6469         free(rec);
6470 }
6471
6472 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6473 {
6474         cache_tree_free_extents(chunk_cache, free_chunk_record);
6475 }
6476
6477 static void free_device_record(struct rb_node *node)
6478 {
6479         struct device_record *rec;
6480
6481         rec = container_of(node, struct device_record, node);
6482         free(rec);
6483 }
6484
6485 FREE_RB_BASED_TREE(device_cache, free_device_record);
6486
6487 int insert_block_group_record(struct block_group_tree *tree,
6488                               struct block_group_record *bg_rec)
6489 {
6490         int ret;
6491
6492         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6493         if (ret)
6494                 return ret;
6495
6496         list_add_tail(&bg_rec->list, &tree->block_groups);
6497         return 0;
6498 }
6499
6500 static void free_block_group_record(struct cache_extent *cache)
6501 {
6502         struct block_group_record *rec;
6503
6504         rec = container_of(cache, struct block_group_record, cache);
6505         list_del_init(&rec->list);
6506         free(rec);
6507 }
6508
6509 void free_block_group_tree(struct block_group_tree *tree)
6510 {
6511         cache_tree_free_extents(&tree->tree, free_block_group_record);
6512 }
6513
6514 int insert_device_extent_record(struct device_extent_tree *tree,
6515                                 struct device_extent_record *de_rec)
6516 {
6517         int ret;
6518
6519         /*
6520          * Device extent is a bit different from the other extents, because
6521          * the extents which belong to the different devices may have the
6522          * same start and size, so we need use the special extent cache
6523          * search/insert functions.
6524          */
6525         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6526         if (ret)
6527                 return ret;
6528
6529         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6530         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6531         return 0;
6532 }
6533
6534 static void free_device_extent_record(struct cache_extent *cache)
6535 {
6536         struct device_extent_record *rec;
6537
6538         rec = container_of(cache, struct device_extent_record, cache);
6539         if (!list_empty(&rec->chunk_list))
6540                 list_del_init(&rec->chunk_list);
6541         if (!list_empty(&rec->device_list))
6542                 list_del_init(&rec->device_list);
6543         free(rec);
6544 }
6545
6546 void free_device_extent_tree(struct device_extent_tree *tree)
6547 {
6548         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6549 }
6550
6551 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6552 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6553                                  struct extent_buffer *leaf, int slot)
6554 {
6555         struct btrfs_extent_ref_v0 *ref0;
6556         struct btrfs_key key;
6557         int ret;
6558
6559         btrfs_item_key_to_cpu(leaf, &key, slot);
6560         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6561         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6562                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6563                                 0, 0);
6564         } else {
6565                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6566                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6567         }
6568         return ret;
6569 }
6570 #endif
6571
6572 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6573                                             struct btrfs_key *key,
6574                                             int slot)
6575 {
6576         struct btrfs_chunk *ptr;
6577         struct chunk_record *rec;
6578         int num_stripes, i;
6579
6580         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6581         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6582
6583         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6584         if (!rec) {
6585                 fprintf(stderr, "memory allocation failed\n");
6586                 exit(-1);
6587         }
6588
6589         INIT_LIST_HEAD(&rec->list);
6590         INIT_LIST_HEAD(&rec->dextents);
6591         rec->bg_rec = NULL;
6592
6593         rec->cache.start = key->offset;
6594         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6595
6596         rec->generation = btrfs_header_generation(leaf);
6597
6598         rec->objectid = key->objectid;
6599         rec->type = key->type;
6600         rec->offset = key->offset;
6601
6602         rec->length = rec->cache.size;
6603         rec->owner = btrfs_chunk_owner(leaf, ptr);
6604         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6605         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6606         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6607         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6608         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6609         rec->num_stripes = num_stripes;
6610         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6611
6612         for (i = 0; i < rec->num_stripes; ++i) {
6613                 rec->stripes[i].devid =
6614                         btrfs_stripe_devid_nr(leaf, ptr, i);
6615                 rec->stripes[i].offset =
6616                         btrfs_stripe_offset_nr(leaf, ptr, i);
6617                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6618                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6619                                 BTRFS_UUID_SIZE);
6620         }
6621
6622         return rec;
6623 }
6624
6625 static int process_chunk_item(struct cache_tree *chunk_cache,
6626                               struct btrfs_key *key, struct extent_buffer *eb,
6627                               int slot)
6628 {
6629         struct chunk_record *rec;
6630         struct btrfs_chunk *chunk;
6631         int ret = 0;
6632
6633         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6634         /*
6635          * Do extra check for this chunk item,
6636          *
6637          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6638          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6639          * and owner<->key_type check.
6640          */
6641         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6642                                       key->offset);
6643         if (ret < 0) {
6644                 error("chunk(%llu, %llu) is not valid, ignore it",
6645                       key->offset, btrfs_chunk_length(eb, chunk));
6646                 return 0;
6647         }
6648         rec = btrfs_new_chunk_record(eb, key, slot);
6649         ret = insert_cache_extent(chunk_cache, &rec->cache);
6650         if (ret) {
6651                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6652                         rec->offset, rec->length);
6653                 free(rec);
6654         }
6655
6656         return ret;
6657 }
6658
6659 static int process_device_item(struct rb_root *dev_cache,
6660                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6661 {
6662         struct btrfs_dev_item *ptr;
6663         struct device_record *rec;
6664         int ret = 0;
6665
6666         ptr = btrfs_item_ptr(eb,
6667                 slot, struct btrfs_dev_item);
6668
6669         rec = malloc(sizeof(*rec));
6670         if (!rec) {
6671                 fprintf(stderr, "memory allocation failed\n");
6672                 return -ENOMEM;
6673         }
6674
6675         rec->devid = key->offset;
6676         rec->generation = btrfs_header_generation(eb);
6677
6678         rec->objectid = key->objectid;
6679         rec->type = key->type;
6680         rec->offset = key->offset;
6681
6682         rec->devid = btrfs_device_id(eb, ptr);
6683         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6684         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6685
6686         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6687         if (ret) {
6688                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6689                 free(rec);
6690         }
6691
6692         return ret;
6693 }
6694
6695 struct block_group_record *
6696 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6697                              int slot)
6698 {
6699         struct btrfs_block_group_item *ptr;
6700         struct block_group_record *rec;
6701
6702         rec = calloc(1, sizeof(*rec));
6703         if (!rec) {
6704                 fprintf(stderr, "memory allocation failed\n");
6705                 exit(-1);
6706         }
6707
6708         rec->cache.start = key->objectid;
6709         rec->cache.size = key->offset;
6710
6711         rec->generation = btrfs_header_generation(leaf);
6712
6713         rec->objectid = key->objectid;
6714         rec->type = key->type;
6715         rec->offset = key->offset;
6716
6717         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6718         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6719
6720         INIT_LIST_HEAD(&rec->list);
6721
6722         return rec;
6723 }
6724
6725 static int process_block_group_item(struct block_group_tree *block_group_cache,
6726                                     struct btrfs_key *key,
6727                                     struct extent_buffer *eb, int slot)
6728 {
6729         struct block_group_record *rec;
6730         int ret = 0;
6731
6732         rec = btrfs_new_block_group_record(eb, key, slot);
6733         ret = insert_block_group_record(block_group_cache, rec);
6734         if (ret) {
6735                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6736                         rec->objectid, rec->offset);
6737                 free(rec);
6738         }
6739
6740         return ret;
6741 }
6742
6743 struct device_extent_record *
6744 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6745                                struct btrfs_key *key, int slot)
6746 {
6747         struct device_extent_record *rec;
6748         struct btrfs_dev_extent *ptr;
6749
6750         rec = calloc(1, sizeof(*rec));
6751         if (!rec) {
6752                 fprintf(stderr, "memory allocation failed\n");
6753                 exit(-1);
6754         }
6755
6756         rec->cache.objectid = key->objectid;
6757         rec->cache.start = key->offset;
6758
6759         rec->generation = btrfs_header_generation(leaf);
6760
6761         rec->objectid = key->objectid;
6762         rec->type = key->type;
6763         rec->offset = key->offset;
6764
6765         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6766         rec->chunk_objecteid =
6767                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6768         rec->chunk_offset =
6769                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6770         rec->length = btrfs_dev_extent_length(leaf, ptr);
6771         rec->cache.size = rec->length;
6772
6773         INIT_LIST_HEAD(&rec->chunk_list);
6774         INIT_LIST_HEAD(&rec->device_list);
6775
6776         return rec;
6777 }
6778
6779 static int
6780 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6781                            struct btrfs_key *key, struct extent_buffer *eb,
6782                            int slot)
6783 {
6784         struct device_extent_record *rec;
6785         int ret;
6786
6787         rec = btrfs_new_device_extent_record(eb, key, slot);
6788         ret = insert_device_extent_record(dev_extent_cache, rec);
6789         if (ret) {
6790                 fprintf(stderr,
6791                         "Device extent[%llu, %llu, %llu] existed.\n",
6792                         rec->objectid, rec->offset, rec->length);
6793                 free(rec);
6794         }
6795
6796         return ret;
6797 }
6798
6799 static int process_extent_item(struct btrfs_root *root,
6800                                struct cache_tree *extent_cache,
6801                                struct extent_buffer *eb, int slot)
6802 {
6803         struct btrfs_extent_item *ei;
6804         struct btrfs_extent_inline_ref *iref;
6805         struct btrfs_extent_data_ref *dref;
6806         struct btrfs_shared_data_ref *sref;
6807         struct btrfs_key key;
6808         struct extent_record tmpl;
6809         unsigned long end;
6810         unsigned long ptr;
6811         int ret;
6812         int type;
6813         u32 item_size = btrfs_item_size_nr(eb, slot);
6814         u64 refs = 0;
6815         u64 offset;
6816         u64 num_bytes;
6817         int metadata = 0;
6818
6819         btrfs_item_key_to_cpu(eb, &key, slot);
6820
6821         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6822                 metadata = 1;
6823                 num_bytes = root->fs_info->nodesize;
6824         } else {
6825                 num_bytes = key.offset;
6826         }
6827
6828         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6829                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6830                       key.objectid, root->fs_info->sectorsize);
6831                 return -EIO;
6832         }
6833         if (item_size < sizeof(*ei)) {
6834 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6835                 struct btrfs_extent_item_v0 *ei0;
6836                 BUG_ON(item_size != sizeof(*ei0));
6837                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6838                 refs = btrfs_extent_refs_v0(eb, ei0);
6839 #else
6840                 BUG();
6841 #endif
6842                 memset(&tmpl, 0, sizeof(tmpl));
6843                 tmpl.start = key.objectid;
6844                 tmpl.nr = num_bytes;
6845                 tmpl.extent_item_refs = refs;
6846                 tmpl.metadata = metadata;
6847                 tmpl.found_rec = 1;
6848                 tmpl.max_size = num_bytes;
6849
6850                 return add_extent_rec(extent_cache, &tmpl);
6851         }
6852
6853         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6854         refs = btrfs_extent_refs(eb, ei);
6855         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6856                 metadata = 1;
6857         else
6858                 metadata = 0;
6859         if (metadata && num_bytes != root->fs_info->nodesize) {
6860                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6861                       num_bytes, root->fs_info->nodesize);
6862                 return -EIO;
6863         }
6864         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6865                 error("ignore invalid data extent, length %llu is not aligned to %u",
6866                       num_bytes, root->fs_info->sectorsize);
6867                 return -EIO;
6868         }
6869
6870         memset(&tmpl, 0, sizeof(tmpl));
6871         tmpl.start = key.objectid;
6872         tmpl.nr = num_bytes;
6873         tmpl.extent_item_refs = refs;
6874         tmpl.metadata = metadata;
6875         tmpl.found_rec = 1;
6876         tmpl.max_size = num_bytes;
6877         add_extent_rec(extent_cache, &tmpl);
6878
6879         ptr = (unsigned long)(ei + 1);
6880         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6881             key.type == BTRFS_EXTENT_ITEM_KEY)
6882                 ptr += sizeof(struct btrfs_tree_block_info);
6883
6884         end = (unsigned long)ei + item_size;
6885         while (ptr < end) {
6886                 iref = (struct btrfs_extent_inline_ref *)ptr;
6887                 type = btrfs_extent_inline_ref_type(eb, iref);
6888                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6889                 switch (type) {
6890                 case BTRFS_TREE_BLOCK_REF_KEY:
6891                         ret = add_tree_backref(extent_cache, key.objectid,
6892                                         0, offset, 0);
6893                         if (ret < 0)
6894                                 error(
6895                         "add_tree_backref failed (extent items tree block): %s",
6896                                       strerror(-ret));
6897                         break;
6898                 case BTRFS_SHARED_BLOCK_REF_KEY:
6899                         ret = add_tree_backref(extent_cache, key.objectid,
6900                                         offset, 0, 0);
6901                         if (ret < 0)
6902                                 error(
6903                         "add_tree_backref failed (extent items shared block): %s",
6904                                       strerror(-ret));
6905                         break;
6906                 case BTRFS_EXTENT_DATA_REF_KEY:
6907                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6908                         add_data_backref(extent_cache, key.objectid, 0,
6909                                         btrfs_extent_data_ref_root(eb, dref),
6910                                         btrfs_extent_data_ref_objectid(eb,
6911                                                                        dref),
6912                                         btrfs_extent_data_ref_offset(eb, dref),
6913                                         btrfs_extent_data_ref_count(eb, dref),
6914                                         0, num_bytes);
6915                         break;
6916                 case BTRFS_SHARED_DATA_REF_KEY:
6917                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6918                         add_data_backref(extent_cache, key.objectid, offset,
6919                                         0, 0, 0,
6920                                         btrfs_shared_data_ref_count(eb, sref),
6921                                         0, num_bytes);
6922                         break;
6923                 default:
6924                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6925                                 key.objectid, key.type, num_bytes);
6926                         goto out;
6927                 }
6928                 ptr += btrfs_extent_inline_ref_size(type);
6929         }
6930         WARN_ON(ptr > end);
6931 out:
6932         return 0;
6933 }
6934
6935 static int check_cache_range(struct btrfs_root *root,
6936                              struct btrfs_block_group_cache *cache,
6937                              u64 offset, u64 bytes)
6938 {
6939         struct btrfs_free_space *entry;
6940         u64 *logical;
6941         u64 bytenr;
6942         int stripe_len;
6943         int i, nr, ret;
6944
6945         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6946                 bytenr = btrfs_sb_offset(i);
6947                 ret = btrfs_rmap_block(root->fs_info,
6948                                        cache->key.objectid, bytenr, 0,
6949                                        &logical, &nr, &stripe_len);
6950                 if (ret)
6951                         return ret;
6952
6953                 while (nr--) {
6954                         if (logical[nr] + stripe_len <= offset)
6955                                 continue;
6956                         if (offset + bytes <= logical[nr])
6957                                 continue;
6958                         if (logical[nr] == offset) {
6959                                 if (stripe_len >= bytes) {
6960                                         free(logical);
6961                                         return 0;
6962                                 }
6963                                 bytes -= stripe_len;
6964                                 offset += stripe_len;
6965                         } else if (logical[nr] < offset) {
6966                                 if (logical[nr] + stripe_len >=
6967                                     offset + bytes) {
6968                                         free(logical);
6969                                         return 0;
6970                                 }
6971                                 bytes = (offset + bytes) -
6972                                         (logical[nr] + stripe_len);
6973                                 offset = logical[nr] + stripe_len;
6974                         } else {
6975                                 /*
6976                                  * Could be tricky, the super may land in the
6977                                  * middle of the area we're checking.  First
6978                                  * check the easiest case, it's at the end.
6979                                  */
6980                                 if (logical[nr] + stripe_len >=
6981                                     bytes + offset) {
6982                                         bytes = logical[nr] - offset;
6983                                         continue;
6984                                 }
6985
6986                                 /* Check the left side */
6987                                 ret = check_cache_range(root, cache,
6988                                                         offset,
6989                                                         logical[nr] - offset);
6990                                 if (ret) {
6991                                         free(logical);
6992                                         return ret;
6993                                 }
6994
6995                                 /* Now we continue with the right side */
6996                                 bytes = (offset + bytes) -
6997                                         (logical[nr] + stripe_len);
6998                                 offset = logical[nr] + stripe_len;
6999                         }
7000                 }
7001
7002                 free(logical);
7003         }
7004
7005         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7006         if (!entry) {
7007                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7008                         offset, offset+bytes);
7009                 return -EINVAL;
7010         }
7011
7012         if (entry->offset != offset) {
7013                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7014                         entry->offset);
7015                 return -EINVAL;
7016         }
7017
7018         if (entry->bytes != bytes) {
7019                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7020                         bytes, entry->bytes, offset);
7021                 return -EINVAL;
7022         }
7023
7024         unlink_free_space(cache->free_space_ctl, entry);
7025         free(entry);
7026         return 0;
7027 }
7028
7029 static int verify_space_cache(struct btrfs_root *root,
7030                               struct btrfs_block_group_cache *cache)
7031 {
7032         struct btrfs_path path;
7033         struct extent_buffer *leaf;
7034         struct btrfs_key key;
7035         u64 last;
7036         int ret = 0;
7037
7038         root = root->fs_info->extent_root;
7039
7040         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7041
7042         btrfs_init_path(&path);
7043         key.objectid = last;
7044         key.offset = 0;
7045         key.type = BTRFS_EXTENT_ITEM_KEY;
7046         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7047         if (ret < 0)
7048                 goto out;
7049         ret = 0;
7050         while (1) {
7051                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7052                         ret = btrfs_next_leaf(root, &path);
7053                         if (ret < 0)
7054                                 goto out;
7055                         if (ret > 0) {
7056                                 ret = 0;
7057                                 break;
7058                         }
7059                 }
7060                 leaf = path.nodes[0];
7061                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7062                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7063                         break;
7064                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7065                     key.type != BTRFS_METADATA_ITEM_KEY) {
7066                         path.slots[0]++;
7067                         continue;
7068                 }
7069
7070                 if (last == key.objectid) {
7071                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7072                                 last = key.objectid + key.offset;
7073                         else
7074                                 last = key.objectid + root->fs_info->nodesize;
7075                         path.slots[0]++;
7076                         continue;
7077                 }
7078
7079                 ret = check_cache_range(root, cache, last,
7080                                         key.objectid - last);
7081                 if (ret)
7082                         break;
7083                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7084                         last = key.objectid + key.offset;
7085                 else
7086                         last = key.objectid + root->fs_info->nodesize;
7087                 path.slots[0]++;
7088         }
7089
7090         if (last < cache->key.objectid + cache->key.offset)
7091                 ret = check_cache_range(root, cache, last,
7092                                         cache->key.objectid +
7093                                         cache->key.offset - last);
7094
7095 out:
7096         btrfs_release_path(&path);
7097
7098         if (!ret &&
7099             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7100                 fprintf(stderr, "There are still entries left in the space "
7101                         "cache\n");
7102                 ret = -EINVAL;
7103         }
7104
7105         return ret;
7106 }
7107
7108 static int check_space_cache(struct btrfs_root *root)
7109 {
7110         struct btrfs_block_group_cache *cache;
7111         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7112         int ret;
7113         int error = 0;
7114
7115         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7116             btrfs_super_generation(root->fs_info->super_copy) !=
7117             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7118                 printf("cache and super generation don't match, space cache "
7119                        "will be invalidated\n");
7120                 return 0;
7121         }
7122
7123         if (ctx.progress_enabled) {
7124                 ctx.tp = TASK_FREE_SPACE;
7125                 task_start(ctx.info);
7126         }
7127
7128         while (1) {
7129                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7130                 if (!cache)
7131                         break;
7132
7133                 start = cache->key.objectid + cache->key.offset;
7134                 if (!cache->free_space_ctl) {
7135                         if (btrfs_init_free_space_ctl(cache,
7136                                                 root->fs_info->sectorsize)) {
7137                                 ret = -ENOMEM;
7138                                 break;
7139                         }
7140                 } else {
7141                         btrfs_remove_free_space_cache(cache);
7142                 }
7143
7144                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7145                         ret = exclude_super_stripes(root, cache);
7146                         if (ret) {
7147                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7148                                         strerror(-ret));
7149                                 error++;
7150                                 continue;
7151                         }
7152                         ret = load_free_space_tree(root->fs_info, cache);
7153                         free_excluded_extents(root, cache);
7154                         if (ret < 0) {
7155                                 fprintf(stderr, "could not load free space tree: %s\n",
7156                                         strerror(-ret));
7157                                 error++;
7158                                 continue;
7159                         }
7160                         error += ret;
7161                 } else {
7162                         ret = load_free_space_cache(root->fs_info, cache);
7163                         if (!ret)
7164                                 continue;
7165                 }
7166
7167                 ret = verify_space_cache(root, cache);
7168                 if (ret) {
7169                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7170                                 cache->key.objectid);
7171                         error++;
7172                 }
7173         }
7174
7175         task_stop(ctx.info);
7176
7177         return error ? -EINVAL : 0;
7178 }
7179
7180 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7181                         u64 num_bytes, unsigned long leaf_offset,
7182                         struct extent_buffer *eb) {
7183
7184         struct btrfs_fs_info *fs_info = root->fs_info;
7185         u64 offset = 0;
7186         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7187         char *data;
7188         unsigned long csum_offset;
7189         u32 csum;
7190         u32 csum_expected;
7191         u64 read_len;
7192         u64 data_checked = 0;
7193         u64 tmp;
7194         int ret = 0;
7195         int mirror;
7196         int num_copies;
7197
7198         if (num_bytes % fs_info->sectorsize)
7199                 return -EINVAL;
7200
7201         data = malloc(num_bytes);
7202         if (!data)
7203                 return -ENOMEM;
7204
7205         while (offset < num_bytes) {
7206                 mirror = 0;
7207 again:
7208                 read_len = num_bytes - offset;
7209                 /* read as much space once a time */
7210                 ret = read_extent_data(fs_info, data + offset,
7211                                 bytenr + offset, &read_len, mirror);
7212                 if (ret)
7213                         goto out;
7214                 data_checked = 0;
7215                 /* verify every 4k data's checksum */
7216                 while (data_checked < read_len) {
7217                         csum = ~(u32)0;
7218                         tmp = offset + data_checked;
7219
7220                         csum = btrfs_csum_data((char *)data + tmp,
7221                                                csum, fs_info->sectorsize);
7222                         btrfs_csum_final(csum, (u8 *)&csum);
7223
7224                         csum_offset = leaf_offset +
7225                                  tmp / fs_info->sectorsize * csum_size;
7226                         read_extent_buffer(eb, (char *)&csum_expected,
7227                                            csum_offset, csum_size);
7228                         /* try another mirror */
7229                         if (csum != csum_expected) {
7230                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7231                                                 mirror, bytenr + tmp,
7232                                                 csum, csum_expected);
7233                                 num_copies = btrfs_num_copies(root->fs_info,
7234                                                 bytenr, num_bytes);
7235                                 if (mirror < num_copies - 1) {
7236                                         mirror += 1;
7237                                         goto again;
7238                                 }
7239                         }
7240                         data_checked += fs_info->sectorsize;
7241                 }
7242                 offset += read_len;
7243         }
7244 out:
7245         free(data);
7246         return ret;
7247 }
7248
7249 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7250                                u64 num_bytes)
7251 {
7252         struct btrfs_path path;
7253         struct extent_buffer *leaf;
7254         struct btrfs_key key;
7255         int ret;
7256
7257         btrfs_init_path(&path);
7258         key.objectid = bytenr;
7259         key.type = BTRFS_EXTENT_ITEM_KEY;
7260         key.offset = (u64)-1;
7261
7262 again:
7263         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7264                                 0, 0);
7265         if (ret < 0) {
7266                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7267                 btrfs_release_path(&path);
7268                 return ret;
7269         } else if (ret) {
7270                 if (path.slots[0] > 0) {
7271                         path.slots[0]--;
7272                 } else {
7273                         ret = btrfs_prev_leaf(root, &path);
7274                         if (ret < 0) {
7275                                 goto out;
7276                         } else if (ret > 0) {
7277                                 ret = 0;
7278                                 goto out;
7279                         }
7280                 }
7281         }
7282
7283         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7284
7285         /*
7286          * Block group items come before extent items if they have the same
7287          * bytenr, so walk back one more just in case.  Dear future traveller,
7288          * first congrats on mastering time travel.  Now if it's not too much
7289          * trouble could you go back to 2006 and tell Chris to make the
7290          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7291          * EXTENT_ITEM_KEY please?
7292          */
7293         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7294                 if (path.slots[0] > 0) {
7295                         path.slots[0]--;
7296                 } else {
7297                         ret = btrfs_prev_leaf(root, &path);
7298                         if (ret < 0) {
7299                                 goto out;
7300                         } else if (ret > 0) {
7301                                 ret = 0;
7302                                 goto out;
7303                         }
7304                 }
7305                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7306         }
7307
7308         while (num_bytes) {
7309                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7310                         ret = btrfs_next_leaf(root, &path);
7311                         if (ret < 0) {
7312                                 fprintf(stderr, "Error going to next leaf "
7313                                         "%d\n", ret);
7314                                 btrfs_release_path(&path);
7315                                 return ret;
7316                         } else if (ret) {
7317                                 break;
7318                         }
7319                 }
7320                 leaf = path.nodes[0];
7321                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7322                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7323                         path.slots[0]++;
7324                         continue;
7325                 }
7326                 if (key.objectid + key.offset < bytenr) {
7327                         path.slots[0]++;
7328                         continue;
7329                 }
7330                 if (key.objectid > bytenr + num_bytes)
7331                         break;
7332
7333                 if (key.objectid == bytenr) {
7334                         if (key.offset >= num_bytes) {
7335                                 num_bytes = 0;
7336                                 break;
7337                         }
7338                         num_bytes -= key.offset;
7339                         bytenr += key.offset;
7340                 } else if (key.objectid < bytenr) {
7341                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7342                                 num_bytes = 0;
7343                                 break;
7344                         }
7345                         num_bytes = (bytenr + num_bytes) -
7346                                 (key.objectid + key.offset);
7347                         bytenr = key.objectid + key.offset;
7348                 } else {
7349                         if (key.objectid + key.offset < bytenr + num_bytes) {
7350                                 u64 new_start = key.objectid + key.offset;
7351                                 u64 new_bytes = bytenr + num_bytes - new_start;
7352
7353                                 /*
7354                                  * Weird case, the extent is in the middle of
7355                                  * our range, we'll have to search one side
7356                                  * and then the other.  Not sure if this happens
7357                                  * in real life, but no harm in coding it up
7358                                  * anyway just in case.
7359                                  */
7360                                 btrfs_release_path(&path);
7361                                 ret = check_extent_exists(root, new_start,
7362                                                           new_bytes);
7363                                 if (ret) {
7364                                         fprintf(stderr, "Right section didn't "
7365                                                 "have a record\n");
7366                                         break;
7367                                 }
7368                                 num_bytes = key.objectid - bytenr;
7369                                 goto again;
7370                         }
7371                         num_bytes = key.objectid - bytenr;
7372                 }
7373                 path.slots[0]++;
7374         }
7375         ret = 0;
7376
7377 out:
7378         if (num_bytes && !ret) {
7379                 fprintf(stderr, "There are no extents for csum range "
7380                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7381                 ret = 1;
7382         }
7383
7384         btrfs_release_path(&path);
7385         return ret;
7386 }
7387
7388 static int check_csums(struct btrfs_root *root)
7389 {
7390         struct btrfs_path path;
7391         struct extent_buffer *leaf;
7392         struct btrfs_key key;
7393         u64 offset = 0, num_bytes = 0;
7394         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7395         int errors = 0;
7396         int ret;
7397         u64 data_len;
7398         unsigned long leaf_offset;
7399
7400         root = root->fs_info->csum_root;
7401         if (!extent_buffer_uptodate(root->node)) {
7402                 fprintf(stderr, "No valid csum tree found\n");
7403                 return -ENOENT;
7404         }
7405
7406         btrfs_init_path(&path);
7407         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7408         key.type = BTRFS_EXTENT_CSUM_KEY;
7409         key.offset = 0;
7410         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7411         if (ret < 0) {
7412                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7413                 btrfs_release_path(&path);
7414                 return ret;
7415         }
7416
7417         if (ret > 0 && path.slots[0])
7418                 path.slots[0]--;
7419         ret = 0;
7420
7421         while (1) {
7422                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7423                         ret = btrfs_next_leaf(root, &path);
7424                         if (ret < 0) {
7425                                 fprintf(stderr, "Error going to next leaf "
7426                                         "%d\n", ret);
7427                                 break;
7428                         }
7429                         if (ret)
7430                                 break;
7431                 }
7432                 leaf = path.nodes[0];
7433
7434                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7435                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7436                         path.slots[0]++;
7437                         continue;
7438                 }
7439
7440                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7441                               csum_size) * root->fs_info->sectorsize;
7442                 if (!check_data_csum)
7443                         goto skip_csum_check;
7444                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7445                 ret = check_extent_csums(root, key.offset, data_len,
7446                                          leaf_offset, leaf);
7447                 if (ret)
7448                         break;
7449 skip_csum_check:
7450                 if (!num_bytes) {
7451                         offset = key.offset;
7452                 } else if (key.offset != offset + num_bytes) {
7453                         ret = check_extent_exists(root, offset, num_bytes);
7454                         if (ret) {
7455                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7456                                         "there is no extent record\n",
7457                                         offset, offset+num_bytes);
7458                                 errors++;
7459                         }
7460                         offset = key.offset;
7461                         num_bytes = 0;
7462                 }
7463                 num_bytes += data_len;
7464                 path.slots[0]++;
7465         }
7466
7467         btrfs_release_path(&path);
7468         return errors;
7469 }
7470
7471 static int is_dropped_key(struct btrfs_key *key,
7472                           struct btrfs_key *drop_key) {
7473         if (key->objectid < drop_key->objectid)
7474                 return 1;
7475         else if (key->objectid == drop_key->objectid) {
7476                 if (key->type < drop_key->type)
7477                         return 1;
7478                 else if (key->type == drop_key->type) {
7479                         if (key->offset < drop_key->offset)
7480                                 return 1;
7481                 }
7482         }
7483         return 0;
7484 }
7485
7486 /*
7487  * Here are the rules for FULL_BACKREF.
7488  *
7489  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7490  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7491  *      FULL_BACKREF set.
7492  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7493  *    if it happened after the relocation occurred since we'll have dropped the
7494  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7495  *    have no real way to know for sure.
7496  *
7497  * We process the blocks one root at a time, and we start from the lowest root
7498  * objectid and go to the highest.  So we can just lookup the owner backref for
7499  * the record and if we don't find it then we know it doesn't exist and we have
7500  * a FULL BACKREF.
7501  *
7502  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7503  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7504  * be set or not and then we can check later once we've gathered all the refs.
7505  */
7506 static int calc_extent_flag(struct cache_tree *extent_cache,
7507                            struct extent_buffer *buf,
7508                            struct root_item_record *ri,
7509                            u64 *flags)
7510 {
7511         struct extent_record *rec;
7512         struct cache_extent *cache;
7513         struct tree_backref *tback;
7514         u64 owner = 0;
7515
7516         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7517         /* we have added this extent before */
7518         if (!cache)
7519                 return -ENOENT;
7520
7521         rec = container_of(cache, struct extent_record, cache);
7522
7523         /*
7524          * Except file/reloc tree, we can not have
7525          * FULL BACKREF MODE
7526          */
7527         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7528                 goto normal;
7529         /*
7530          * root node
7531          */
7532         if (buf->start == ri->bytenr)
7533                 goto normal;
7534
7535         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7536                 goto full_backref;
7537
7538         owner = btrfs_header_owner(buf);
7539         if (owner == ri->objectid)
7540                 goto normal;
7541
7542         tback = find_tree_backref(rec, 0, owner);
7543         if (!tback)
7544                 goto full_backref;
7545 normal:
7546         *flags = 0;
7547         if (rec->flag_block_full_backref != FLAG_UNSET &&
7548             rec->flag_block_full_backref != 0)
7549                 rec->bad_full_backref = 1;
7550         return 0;
7551 full_backref:
7552         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7553         if (rec->flag_block_full_backref != FLAG_UNSET &&
7554             rec->flag_block_full_backref != 1)
7555                 rec->bad_full_backref = 1;
7556         return 0;
7557 }
7558
7559 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7560 {
7561         fprintf(stderr, "Invalid key type(");
7562         print_key_type(stderr, 0, key_type);
7563         fprintf(stderr, ") found in root(");
7564         print_objectid(stderr, rootid, 0);
7565         fprintf(stderr, ")\n");
7566 }
7567
7568 /*
7569  * Check if the key is valid with its extent buffer.
7570  *
7571  * This is a early check in case invalid key exists in a extent buffer
7572  * This is not comprehensive yet, but should prevent wrong key/item passed
7573  * further
7574  */
7575 static int check_type_with_root(u64 rootid, u8 key_type)
7576 {
7577         switch (key_type) {
7578         /* Only valid in chunk tree */
7579         case BTRFS_DEV_ITEM_KEY:
7580         case BTRFS_CHUNK_ITEM_KEY:
7581                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7582                         goto err;
7583                 break;
7584         /* valid in csum and log tree */
7585         case BTRFS_CSUM_TREE_OBJECTID:
7586                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7587                       is_fstree(rootid)))
7588                         goto err;
7589                 break;
7590         case BTRFS_EXTENT_ITEM_KEY:
7591         case BTRFS_METADATA_ITEM_KEY:
7592         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7593                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7594                         goto err;
7595                 break;
7596         case BTRFS_ROOT_ITEM_KEY:
7597                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7598                         goto err;
7599                 break;
7600         case BTRFS_DEV_EXTENT_KEY:
7601                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7602                         goto err;
7603                 break;
7604         }
7605         return 0;
7606 err:
7607         report_mismatch_key_root(key_type, rootid);
7608         return -EINVAL;
7609 }
7610
7611 static int run_next_block(struct btrfs_root *root,
7612                           struct block_info *bits,
7613                           int bits_nr,
7614                           u64 *last,
7615                           struct cache_tree *pending,
7616                           struct cache_tree *seen,
7617                           struct cache_tree *reada,
7618                           struct cache_tree *nodes,
7619                           struct cache_tree *extent_cache,
7620                           struct cache_tree *chunk_cache,
7621                           struct rb_root *dev_cache,
7622                           struct block_group_tree *block_group_cache,
7623                           struct device_extent_tree *dev_extent_cache,
7624                           struct root_item_record *ri)
7625 {
7626         struct btrfs_fs_info *fs_info = root->fs_info;
7627         struct extent_buffer *buf;
7628         struct extent_record *rec = NULL;
7629         u64 bytenr;
7630         u32 size;
7631         u64 parent;
7632         u64 owner;
7633         u64 flags;
7634         u64 ptr;
7635         u64 gen = 0;
7636         int ret = 0;
7637         int i;
7638         int nritems;
7639         struct btrfs_key key;
7640         struct cache_extent *cache;
7641         int reada_bits;
7642
7643         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7644                                     bits_nr, &reada_bits);
7645         if (nritems == 0)
7646                 return 1;
7647
7648         if (!reada_bits) {
7649                 for(i = 0; i < nritems; i++) {
7650                         ret = add_cache_extent(reada, bits[i].start,
7651                                                bits[i].size);
7652                         if (ret == -EEXIST)
7653                                 continue;
7654
7655                         /* fixme, get the parent transid */
7656                         readahead_tree_block(fs_info, bits[i].start,
7657                                              bits[i].size, 0);
7658                 }
7659         }
7660         *last = bits[0].start;
7661         bytenr = bits[0].start;
7662         size = bits[0].size;
7663
7664         cache = lookup_cache_extent(pending, bytenr, size);
7665         if (cache) {
7666                 remove_cache_extent(pending, cache);
7667                 free(cache);
7668         }
7669         cache = lookup_cache_extent(reada, bytenr, size);
7670         if (cache) {
7671                 remove_cache_extent(reada, cache);
7672                 free(cache);
7673         }
7674         cache = lookup_cache_extent(nodes, bytenr, size);
7675         if (cache) {
7676                 remove_cache_extent(nodes, cache);
7677                 free(cache);
7678         }
7679         cache = lookup_cache_extent(extent_cache, bytenr, size);
7680         if (cache) {
7681                 rec = container_of(cache, struct extent_record, cache);
7682                 gen = rec->parent_generation;
7683         }
7684
7685         /* fixme, get the real parent transid */
7686         buf = read_tree_block(root->fs_info, bytenr, size, gen);
7687         if (!extent_buffer_uptodate(buf)) {
7688                 record_bad_block_io(root->fs_info,
7689                                     extent_cache, bytenr, size);
7690                 goto out;
7691         }
7692
7693         nritems = btrfs_header_nritems(buf);
7694
7695         flags = 0;
7696         if (!init_extent_tree) {
7697                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7698                                        btrfs_header_level(buf), 1, NULL,
7699                                        &flags);
7700                 if (ret < 0) {
7701                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7702                         if (ret < 0) {
7703                                 fprintf(stderr, "Couldn't calc extent flags\n");
7704                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7705                         }
7706                 }
7707         } else {
7708                 flags = 0;
7709                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7710                 if (ret < 0) {
7711                         fprintf(stderr, "Couldn't calc extent flags\n");
7712                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7713                 }
7714         }
7715
7716         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7717                 if (ri != NULL &&
7718                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7719                     ri->objectid == btrfs_header_owner(buf)) {
7720                         /*
7721                          * Ok we got to this block from it's original owner and
7722                          * we have FULL_BACKREF set.  Relocation can leave
7723                          * converted blocks over so this is altogether possible,
7724                          * however it's not possible if the generation > the
7725                          * last snapshot, so check for this case.
7726                          */
7727                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7728                             btrfs_header_generation(buf) > ri->last_snapshot) {
7729                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7730                                 rec->bad_full_backref = 1;
7731                         }
7732                 }
7733         } else {
7734                 if (ri != NULL &&
7735                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7736                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7737                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7738                         rec->bad_full_backref = 1;
7739                 }
7740         }
7741
7742         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7743                 rec->flag_block_full_backref = 1;
7744                 parent = bytenr;
7745                 owner = 0;
7746         } else {
7747                 rec->flag_block_full_backref = 0;
7748                 parent = 0;
7749                 owner = btrfs_header_owner(buf);
7750         }
7751
7752         ret = check_block(root, extent_cache, buf, flags);
7753         if (ret)
7754                 goto out;
7755
7756         if (btrfs_is_leaf(buf)) {
7757                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7758                 for (i = 0; i < nritems; i++) {
7759                         struct btrfs_file_extent_item *fi;
7760                         btrfs_item_key_to_cpu(buf, &key, i);
7761                         /*
7762                          * Check key type against the leaf owner.
7763                          * Could filter quite a lot of early error if
7764                          * owner is correct
7765                          */
7766                         if (check_type_with_root(btrfs_header_owner(buf),
7767                                                  key.type)) {
7768                                 fprintf(stderr, "ignoring invalid key\n");
7769                                 continue;
7770                         }
7771                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7772                                 process_extent_item(root, extent_cache, buf,
7773                                                     i);
7774                                 continue;
7775                         }
7776                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7777                                 process_extent_item(root, extent_cache, buf,
7778                                                     i);
7779                                 continue;
7780                         }
7781                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7782                                 total_csum_bytes +=
7783                                         btrfs_item_size_nr(buf, i);
7784                                 continue;
7785                         }
7786                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7787                                 process_chunk_item(chunk_cache, &key, buf, i);
7788                                 continue;
7789                         }
7790                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7791                                 process_device_item(dev_cache, &key, buf, i);
7792                                 continue;
7793                         }
7794                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7795                                 process_block_group_item(block_group_cache,
7796                                         &key, buf, i);
7797                                 continue;
7798                         }
7799                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7800                                 process_device_extent_item(dev_extent_cache,
7801                                         &key, buf, i);
7802                                 continue;
7803
7804                         }
7805                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7806 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7807                                 process_extent_ref_v0(extent_cache, buf, i);
7808 #else
7809                                 BUG();
7810 #endif
7811                                 continue;
7812                         }
7813
7814                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7815                                 ret = add_tree_backref(extent_cache,
7816                                                 key.objectid, 0, key.offset, 0);
7817                                 if (ret < 0)
7818                                         error(
7819                                 "add_tree_backref failed (leaf tree block): %s",
7820                                               strerror(-ret));
7821                                 continue;
7822                         }
7823                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7824                                 ret = add_tree_backref(extent_cache,
7825                                                 key.objectid, key.offset, 0, 0);
7826                                 if (ret < 0)
7827                                         error(
7828                                 "add_tree_backref failed (leaf shared block): %s",
7829                                               strerror(-ret));
7830                                 continue;
7831                         }
7832                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7833                                 struct btrfs_extent_data_ref *ref;
7834                                 ref = btrfs_item_ptr(buf, i,
7835                                                 struct btrfs_extent_data_ref);
7836                                 add_data_backref(extent_cache,
7837                                         key.objectid, 0,
7838                                         btrfs_extent_data_ref_root(buf, ref),
7839                                         btrfs_extent_data_ref_objectid(buf,
7840                                                                        ref),
7841                                         btrfs_extent_data_ref_offset(buf, ref),
7842                                         btrfs_extent_data_ref_count(buf, ref),
7843                                         0, root->fs_info->sectorsize);
7844                                 continue;
7845                         }
7846                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7847                                 struct btrfs_shared_data_ref *ref;
7848                                 ref = btrfs_item_ptr(buf, i,
7849                                                 struct btrfs_shared_data_ref);
7850                                 add_data_backref(extent_cache,
7851                                         key.objectid, key.offset, 0, 0, 0,
7852                                         btrfs_shared_data_ref_count(buf, ref),
7853                                         0, root->fs_info->sectorsize);
7854                                 continue;
7855                         }
7856                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7857                                 struct bad_item *bad;
7858
7859                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7860                                         continue;
7861                                 if (!owner)
7862                                         continue;
7863                                 bad = malloc(sizeof(struct bad_item));
7864                                 if (!bad)
7865                                         continue;
7866                                 INIT_LIST_HEAD(&bad->list);
7867                                 memcpy(&bad->key, &key,
7868                                        sizeof(struct btrfs_key));
7869                                 bad->root_id = owner;
7870                                 list_add_tail(&bad->list, &delete_items);
7871                                 continue;
7872                         }
7873                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7874                                 continue;
7875                         fi = btrfs_item_ptr(buf, i,
7876                                             struct btrfs_file_extent_item);
7877                         if (btrfs_file_extent_type(buf, fi) ==
7878                             BTRFS_FILE_EXTENT_INLINE)
7879                                 continue;
7880                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7881                                 continue;
7882
7883                         data_bytes_allocated +=
7884                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7885                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7886                                 abort();
7887                         }
7888                         data_bytes_referenced +=
7889                                 btrfs_file_extent_num_bytes(buf, fi);
7890                         add_data_backref(extent_cache,
7891                                 btrfs_file_extent_disk_bytenr(buf, fi),
7892                                 parent, owner, key.objectid, key.offset -
7893                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7894                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7895                 }
7896         } else {
7897                 int level;
7898                 struct btrfs_key first_key;
7899
7900                 first_key.objectid = 0;
7901
7902                 if (nritems > 0)
7903                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7904                 level = btrfs_header_level(buf);
7905                 for (i = 0; i < nritems; i++) {
7906                         struct extent_record tmpl;
7907
7908                         ptr = btrfs_node_blockptr(buf, i);
7909                         size = root->fs_info->nodesize;
7910                         btrfs_node_key_to_cpu(buf, &key, i);
7911                         if (ri != NULL) {
7912                                 if ((level == ri->drop_level)
7913                                     && is_dropped_key(&key, &ri->drop_key)) {
7914                                         continue;
7915                                 }
7916                         }
7917
7918                         memset(&tmpl, 0, sizeof(tmpl));
7919                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7920                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7921                         tmpl.start = ptr;
7922                         tmpl.nr = size;
7923                         tmpl.refs = 1;
7924                         tmpl.metadata = 1;
7925                         tmpl.max_size = size;
7926                         ret = add_extent_rec(extent_cache, &tmpl);
7927                         if (ret < 0)
7928                                 goto out;
7929
7930                         ret = add_tree_backref(extent_cache, ptr, parent,
7931                                         owner, 1);
7932                         if (ret < 0) {
7933                                 error(
7934                                 "add_tree_backref failed (non-leaf block): %s",
7935                                       strerror(-ret));
7936                                 continue;
7937                         }
7938
7939                         if (level > 1) {
7940                                 add_pending(nodes, seen, ptr, size);
7941                         } else {
7942                                 add_pending(pending, seen, ptr, size);
7943                         }
7944                 }
7945                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7946                                       nritems) * sizeof(struct btrfs_key_ptr);
7947         }
7948         total_btree_bytes += buf->len;
7949         if (fs_root_objectid(btrfs_header_owner(buf)))
7950                 total_fs_tree_bytes += buf->len;
7951         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7952                 total_extent_tree_bytes += buf->len;
7953         if (!found_old_backref &&
7954             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7955             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7956             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7957                 found_old_backref = 1;
7958 out:
7959         free_extent_buffer(buf);
7960         return ret;
7961 }
7962
7963 static int add_root_to_pending(struct extent_buffer *buf,
7964                                struct cache_tree *extent_cache,
7965                                struct cache_tree *pending,
7966                                struct cache_tree *seen,
7967                                struct cache_tree *nodes,
7968                                u64 objectid)
7969 {
7970         struct extent_record tmpl;
7971         int ret;
7972
7973         if (btrfs_header_level(buf) > 0)
7974                 add_pending(nodes, seen, buf->start, buf->len);
7975         else
7976                 add_pending(pending, seen, buf->start, buf->len);
7977
7978         memset(&tmpl, 0, sizeof(tmpl));
7979         tmpl.start = buf->start;
7980         tmpl.nr = buf->len;
7981         tmpl.is_root = 1;
7982         tmpl.refs = 1;
7983         tmpl.metadata = 1;
7984         tmpl.max_size = buf->len;
7985         add_extent_rec(extent_cache, &tmpl);
7986
7987         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7988             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7989                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7990                                 0, 1);
7991         else
7992                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7993                                 1);
7994         return ret;
7995 }
7996
7997 /* as we fix the tree, we might be deleting blocks that
7998  * we're tracking for repair.  This hook makes sure we
7999  * remove any backrefs for blocks as we are fixing them.
8000  */
8001 static int free_extent_hook(struct btrfs_trans_handle *trans,
8002                             struct btrfs_root *root,
8003                             u64 bytenr, u64 num_bytes, u64 parent,
8004                             u64 root_objectid, u64 owner, u64 offset,
8005                             int refs_to_drop)
8006 {
8007         struct extent_record *rec;
8008         struct cache_extent *cache;
8009         int is_data;
8010         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8011
8012         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8013         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8014         if (!cache)
8015                 return 0;
8016
8017         rec = container_of(cache, struct extent_record, cache);
8018         if (is_data) {
8019                 struct data_backref *back;
8020                 back = find_data_backref(rec, parent, root_objectid, owner,
8021                                          offset, 1, bytenr, num_bytes);
8022                 if (!back)
8023                         goto out;
8024                 if (back->node.found_ref) {
8025                         back->found_ref -= refs_to_drop;
8026                         if (rec->refs)
8027                                 rec->refs -= refs_to_drop;
8028                 }
8029                 if (back->node.found_extent_tree) {
8030                         back->num_refs -= refs_to_drop;
8031                         if (rec->extent_item_refs)
8032                                 rec->extent_item_refs -= refs_to_drop;
8033                 }
8034                 if (back->found_ref == 0)
8035                         back->node.found_ref = 0;
8036                 if (back->num_refs == 0)
8037                         back->node.found_extent_tree = 0;
8038
8039                 if (!back->node.found_extent_tree && back->node.found_ref) {
8040                         list_del(&back->node.list);
8041                         free(back);
8042                 }
8043         } else {
8044                 struct tree_backref *back;
8045                 back = find_tree_backref(rec, parent, root_objectid);
8046                 if (!back)
8047                         goto out;
8048                 if (back->node.found_ref) {
8049                         if (rec->refs)
8050                                 rec->refs--;
8051                         back->node.found_ref = 0;
8052                 }
8053                 if (back->node.found_extent_tree) {
8054                         if (rec->extent_item_refs)
8055                                 rec->extent_item_refs--;
8056                         back->node.found_extent_tree = 0;
8057                 }
8058                 if (!back->node.found_extent_tree && back->node.found_ref) {
8059                         list_del(&back->node.list);
8060                         free(back);
8061                 }
8062         }
8063         maybe_free_extent_rec(extent_cache, rec);
8064 out:
8065         return 0;
8066 }
8067
8068 static int delete_extent_records(struct btrfs_trans_handle *trans,
8069                                  struct btrfs_root *root,
8070                                  struct btrfs_path *path,
8071                                  u64 bytenr)
8072 {
8073         struct btrfs_key key;
8074         struct btrfs_key found_key;
8075         struct extent_buffer *leaf;
8076         int ret;
8077         int slot;
8078
8079
8080         key.objectid = bytenr;
8081         key.type = (u8)-1;
8082         key.offset = (u64)-1;
8083
8084         while(1) {
8085                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8086                                         &key, path, 0, 1);
8087                 if (ret < 0)
8088                         break;
8089
8090                 if (ret > 0) {
8091                         ret = 0;
8092                         if (path->slots[0] == 0)
8093                                 break;
8094                         path->slots[0]--;
8095                 }
8096                 ret = 0;
8097
8098                 leaf = path->nodes[0];
8099                 slot = path->slots[0];
8100
8101                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8102                 if (found_key.objectid != bytenr)
8103                         break;
8104
8105                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8106                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8107                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8108                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8109                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8110                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8111                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8112                         btrfs_release_path(path);
8113                         if (found_key.type == 0) {
8114                                 if (found_key.offset == 0)
8115                                         break;
8116                                 key.offset = found_key.offset - 1;
8117                                 key.type = found_key.type;
8118                         }
8119                         key.type = found_key.type - 1;
8120                         key.offset = (u64)-1;
8121                         continue;
8122                 }
8123
8124                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8125                         found_key.objectid, found_key.type, found_key.offset);
8126
8127                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8128                 if (ret)
8129                         break;
8130                 btrfs_release_path(path);
8131
8132                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8133                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8134                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8135                                 found_key.offset : root->fs_info->nodesize;
8136
8137                         ret = btrfs_update_block_group(trans, root, bytenr,
8138                                                        bytes, 0, 0);
8139                         if (ret)
8140                                 break;
8141                 }
8142         }
8143
8144         btrfs_release_path(path);
8145         return ret;
8146 }
8147
8148 /*
8149  * for a single backref, this will allocate a new extent
8150  * and add the backref to it.
8151  */
8152 static int record_extent(struct btrfs_trans_handle *trans,
8153                          struct btrfs_fs_info *info,
8154                          struct btrfs_path *path,
8155                          struct extent_record *rec,
8156                          struct extent_backref *back,
8157                          int allocated, u64 flags)
8158 {
8159         int ret = 0;
8160         struct btrfs_root *extent_root = info->extent_root;
8161         struct extent_buffer *leaf;
8162         struct btrfs_key ins_key;
8163         struct btrfs_extent_item *ei;
8164         struct data_backref *dback;
8165         struct btrfs_tree_block_info *bi;
8166
8167         if (!back->is_data)
8168                 rec->max_size = max_t(u64, rec->max_size,
8169                                     info->nodesize);
8170
8171         if (!allocated) {
8172                 u32 item_size = sizeof(*ei);
8173
8174                 if (!back->is_data)
8175                         item_size += sizeof(*bi);
8176
8177                 ins_key.objectid = rec->start;
8178                 ins_key.offset = rec->max_size;
8179                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8180
8181                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8182                                         &ins_key, item_size);
8183                 if (ret)
8184                         goto fail;
8185
8186                 leaf = path->nodes[0];
8187                 ei = btrfs_item_ptr(leaf, path->slots[0],
8188                                     struct btrfs_extent_item);
8189
8190                 btrfs_set_extent_refs(leaf, ei, 0);
8191                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8192
8193                 if (back->is_data) {
8194                         btrfs_set_extent_flags(leaf, ei,
8195                                                BTRFS_EXTENT_FLAG_DATA);
8196                 } else {
8197                         struct btrfs_disk_key copy_key;;
8198
8199                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8200                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8201                                              sizeof(*bi));
8202
8203                         btrfs_set_disk_key_objectid(&copy_key,
8204                                                     rec->info_objectid);
8205                         btrfs_set_disk_key_type(&copy_key, 0);
8206                         btrfs_set_disk_key_offset(&copy_key, 0);
8207
8208                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8209                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8210
8211                         btrfs_set_extent_flags(leaf, ei,
8212                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8213                 }
8214
8215                 btrfs_mark_buffer_dirty(leaf);
8216                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8217                                                rec->max_size, 1, 0);
8218                 if (ret)
8219                         goto fail;
8220                 btrfs_release_path(path);
8221         }
8222
8223         if (back->is_data) {
8224                 u64 parent;
8225                 int i;
8226
8227                 dback = to_data_backref(back);
8228                 if (back->full_backref)
8229                         parent = dback->parent;
8230                 else
8231                         parent = 0;
8232
8233                 for (i = 0; i < dback->found_ref; i++) {
8234                         /* if parent != 0, we're doing a full backref
8235                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8236                          * just makes the backref allocator create a data
8237                          * backref
8238                          */
8239                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8240                                                    rec->start, rec->max_size,
8241                                                    parent,
8242                                                    dback->root,
8243                                                    parent ?
8244                                                    BTRFS_FIRST_FREE_OBJECTID :
8245                                                    dback->owner,
8246                                                    dback->offset);
8247                         if (ret)
8248                                 break;
8249                 }
8250                 fprintf(stderr, "adding new data backref"
8251                                 " on %llu %s %llu owner %llu"
8252                                 " offset %llu found %d\n",
8253                                 (unsigned long long)rec->start,
8254                                 back->full_backref ?
8255                                 "parent" : "root",
8256                                 back->full_backref ?
8257                                 (unsigned long long)parent :
8258                                 (unsigned long long)dback->root,
8259                                 (unsigned long long)dback->owner,
8260                                 (unsigned long long)dback->offset,
8261                                 dback->found_ref);
8262         } else {
8263                 u64 parent;
8264                 struct tree_backref *tback;
8265
8266                 tback = to_tree_backref(back);
8267                 if (back->full_backref)
8268                         parent = tback->parent;
8269                 else
8270                         parent = 0;
8271
8272                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8273                                            rec->start, rec->max_size,
8274                                            parent, tback->root, 0, 0);
8275                 fprintf(stderr, "adding new tree backref on "
8276                         "start %llu len %llu parent %llu root %llu\n",
8277                         rec->start, rec->max_size, parent, tback->root);
8278         }
8279 fail:
8280         btrfs_release_path(path);
8281         return ret;
8282 }
8283
8284 static struct extent_entry *find_entry(struct list_head *entries,
8285                                        u64 bytenr, u64 bytes)
8286 {
8287         struct extent_entry *entry = NULL;
8288
8289         list_for_each_entry(entry, entries, list) {
8290                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8291                         return entry;
8292         }
8293
8294         return NULL;
8295 }
8296
8297 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8298 {
8299         struct extent_entry *entry, *best = NULL, *prev = NULL;
8300
8301         list_for_each_entry(entry, entries, list) {
8302                 /*
8303                  * If there are as many broken entries as entries then we know
8304                  * not to trust this particular entry.
8305                  */
8306                 if (entry->broken == entry->count)
8307                         continue;
8308
8309                 /*
8310                  * Special case, when there are only two entries and 'best' is
8311                  * the first one
8312                  */
8313                 if (!prev) {
8314                         best = entry;
8315                         prev = entry;
8316                         continue;
8317                 }
8318
8319                 /*
8320                  * If our current entry == best then we can't be sure our best
8321                  * is really the best, so we need to keep searching.
8322                  */
8323                 if (best && best->count == entry->count) {
8324                         prev = entry;
8325                         best = NULL;
8326                         continue;
8327                 }
8328
8329                 /* Prev == entry, not good enough, have to keep searching */
8330                 if (!prev->broken && prev->count == entry->count)
8331                         continue;
8332
8333                 if (!best)
8334                         best = (prev->count > entry->count) ? prev : entry;
8335                 else if (best->count < entry->count)
8336                         best = entry;
8337                 prev = entry;
8338         }
8339
8340         return best;
8341 }
8342
8343 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8344                       struct data_backref *dback, struct extent_entry *entry)
8345 {
8346         struct btrfs_trans_handle *trans;
8347         struct btrfs_root *root;
8348         struct btrfs_file_extent_item *fi;
8349         struct extent_buffer *leaf;
8350         struct btrfs_key key;
8351         u64 bytenr, bytes;
8352         int ret, err;
8353
8354         key.objectid = dback->root;
8355         key.type = BTRFS_ROOT_ITEM_KEY;
8356         key.offset = (u64)-1;
8357         root = btrfs_read_fs_root(info, &key);
8358         if (IS_ERR(root)) {
8359                 fprintf(stderr, "Couldn't find root for our ref\n");
8360                 return -EINVAL;
8361         }
8362
8363         /*
8364          * The backref points to the original offset of the extent if it was
8365          * split, so we need to search down to the offset we have and then walk
8366          * forward until we find the backref we're looking for.
8367          */
8368         key.objectid = dback->owner;
8369         key.type = BTRFS_EXTENT_DATA_KEY;
8370         key.offset = dback->offset;
8371         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8372         if (ret < 0) {
8373                 fprintf(stderr, "Error looking up ref %d\n", ret);
8374                 return ret;
8375         }
8376
8377         while (1) {
8378                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8379                         ret = btrfs_next_leaf(root, path);
8380                         if (ret) {
8381                                 fprintf(stderr, "Couldn't find our ref, next\n");
8382                                 return -EINVAL;
8383                         }
8384                 }
8385                 leaf = path->nodes[0];
8386                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8387                 if (key.objectid != dback->owner ||
8388                     key.type != BTRFS_EXTENT_DATA_KEY) {
8389                         fprintf(stderr, "Couldn't find our ref, search\n");
8390                         return -EINVAL;
8391                 }
8392                 fi = btrfs_item_ptr(leaf, path->slots[0],
8393                                     struct btrfs_file_extent_item);
8394                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8395                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8396
8397                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8398                         break;
8399                 path->slots[0]++;
8400         }
8401
8402         btrfs_release_path(path);
8403
8404         trans = btrfs_start_transaction(root, 1);
8405         if (IS_ERR(trans))
8406                 return PTR_ERR(trans);
8407
8408         /*
8409          * Ok we have the key of the file extent we want to fix, now we can cow
8410          * down to the thing and fix it.
8411          */
8412         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8413         if (ret < 0) {
8414                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8415                         key.objectid, key.type, key.offset, ret);
8416                 goto out;
8417         }
8418         if (ret > 0) {
8419                 fprintf(stderr, "Well that's odd, we just found this key "
8420                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8421                         key.offset);
8422                 ret = -EINVAL;
8423                 goto out;
8424         }
8425         leaf = path->nodes[0];
8426         fi = btrfs_item_ptr(leaf, path->slots[0],
8427                             struct btrfs_file_extent_item);
8428
8429         if (btrfs_file_extent_compression(leaf, fi) &&
8430             dback->disk_bytenr != entry->bytenr) {
8431                 fprintf(stderr, "Ref doesn't match the record start and is "
8432                         "compressed, please take a btrfs-image of this file "
8433                         "system and send it to a btrfs developer so they can "
8434                         "complete this functionality for bytenr %Lu\n",
8435                         dback->disk_bytenr);
8436                 ret = -EINVAL;
8437                 goto out;
8438         }
8439
8440         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8441                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8442         } else if (dback->disk_bytenr > entry->bytenr) {
8443                 u64 off_diff, offset;
8444
8445                 off_diff = dback->disk_bytenr - entry->bytenr;
8446                 offset = btrfs_file_extent_offset(leaf, fi);
8447                 if (dback->disk_bytenr + offset +
8448                     btrfs_file_extent_num_bytes(leaf, fi) >
8449                     entry->bytenr + entry->bytes) {
8450                         fprintf(stderr, "Ref is past the entry end, please "
8451                                 "take a btrfs-image of this file system and "
8452                                 "send it to a btrfs developer, ref %Lu\n",
8453                                 dback->disk_bytenr);
8454                         ret = -EINVAL;
8455                         goto out;
8456                 }
8457                 offset += off_diff;
8458                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8459                 btrfs_set_file_extent_offset(leaf, fi, offset);
8460         } else if (dback->disk_bytenr < entry->bytenr) {
8461                 u64 offset;
8462
8463                 offset = btrfs_file_extent_offset(leaf, fi);
8464                 if (dback->disk_bytenr + offset < entry->bytenr) {
8465                         fprintf(stderr, "Ref is before the entry start, please"
8466                                 " take a btrfs-image of this file system and "
8467                                 "send it to a btrfs developer, ref %Lu\n",
8468                                 dback->disk_bytenr);
8469                         ret = -EINVAL;
8470                         goto out;
8471                 }
8472
8473                 offset += dback->disk_bytenr;
8474                 offset -= entry->bytenr;
8475                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8476                 btrfs_set_file_extent_offset(leaf, fi, offset);
8477         }
8478
8479         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8480
8481         /*
8482          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8483          * only do this if we aren't using compression, otherwise it's a
8484          * trickier case.
8485          */
8486         if (!btrfs_file_extent_compression(leaf, fi))
8487                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8488         else
8489                 printf("ram bytes may be wrong?\n");
8490         btrfs_mark_buffer_dirty(leaf);
8491 out:
8492         err = btrfs_commit_transaction(trans, root);
8493         btrfs_release_path(path);
8494         return ret ? ret : err;
8495 }
8496
8497 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8498                            struct extent_record *rec)
8499 {
8500         struct extent_backref *back;
8501         struct data_backref *dback;
8502         struct extent_entry *entry, *best = NULL;
8503         LIST_HEAD(entries);
8504         int nr_entries = 0;
8505         int broken_entries = 0;
8506         int ret = 0;
8507         short mismatch = 0;
8508
8509         /*
8510          * Metadata is easy and the backrefs should always agree on bytenr and
8511          * size, if not we've got bigger issues.
8512          */
8513         if (rec->metadata)
8514                 return 0;
8515
8516         list_for_each_entry(back, &rec->backrefs, list) {
8517                 if (back->full_backref || !back->is_data)
8518                         continue;
8519
8520                 dback = to_data_backref(back);
8521
8522                 /*
8523                  * We only pay attention to backrefs that we found a real
8524                  * backref for.
8525                  */
8526                 if (dback->found_ref == 0)
8527                         continue;
8528
8529                 /*
8530                  * For now we only catch when the bytes don't match, not the
8531                  * bytenr.  We can easily do this at the same time, but I want
8532                  * to have a fs image to test on before we just add repair
8533                  * functionality willy-nilly so we know we won't screw up the
8534                  * repair.
8535                  */
8536
8537                 entry = find_entry(&entries, dback->disk_bytenr,
8538                                    dback->bytes);
8539                 if (!entry) {
8540                         entry = malloc(sizeof(struct extent_entry));
8541                         if (!entry) {
8542                                 ret = -ENOMEM;
8543                                 goto out;
8544                         }
8545                         memset(entry, 0, sizeof(*entry));
8546                         entry->bytenr = dback->disk_bytenr;
8547                         entry->bytes = dback->bytes;
8548                         list_add_tail(&entry->list, &entries);
8549                         nr_entries++;
8550                 }
8551
8552                 /*
8553                  * If we only have on entry we may think the entries agree when
8554                  * in reality they don't so we have to do some extra checking.
8555                  */
8556                 if (dback->disk_bytenr != rec->start ||
8557                     dback->bytes != rec->nr || back->broken)
8558                         mismatch = 1;
8559
8560                 if (back->broken) {
8561                         entry->broken++;
8562                         broken_entries++;
8563                 }
8564
8565                 entry->count++;
8566         }
8567
8568         /* Yay all the backrefs agree, carry on good sir */
8569         if (nr_entries <= 1 && !mismatch)
8570                 goto out;
8571
8572         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8573                 "%Lu\n", rec->start);
8574
8575         /*
8576          * First we want to see if the backrefs can agree amongst themselves who
8577          * is right, so figure out which one of the entries has the highest
8578          * count.
8579          */
8580         best = find_most_right_entry(&entries);
8581
8582         /*
8583          * Ok so we may have an even split between what the backrefs think, so
8584          * this is where we use the extent ref to see what it thinks.
8585          */
8586         if (!best) {
8587                 entry = find_entry(&entries, rec->start, rec->nr);
8588                 if (!entry && (!broken_entries || !rec->found_rec)) {
8589                         fprintf(stderr, "Backrefs don't agree with each other "
8590                                 "and extent record doesn't agree with anybody,"
8591                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8592                                 rec->start, rec->nr);
8593                         ret = -EINVAL;
8594                         goto out;
8595                 } else if (!entry) {
8596                         /*
8597                          * Ok our backrefs were broken, we'll assume this is the
8598                          * correct value and add an entry for this range.
8599                          */
8600                         entry = malloc(sizeof(struct extent_entry));
8601                         if (!entry) {
8602                                 ret = -ENOMEM;
8603                                 goto out;
8604                         }
8605                         memset(entry, 0, sizeof(*entry));
8606                         entry->bytenr = rec->start;
8607                         entry->bytes = rec->nr;
8608                         list_add_tail(&entry->list, &entries);
8609                         nr_entries++;
8610                 }
8611                 entry->count++;
8612                 best = find_most_right_entry(&entries);
8613                 if (!best) {
8614                         fprintf(stderr, "Backrefs and extent record evenly "
8615                                 "split on who is right, this is going to "
8616                                 "require user input to fix bytenr %Lu bytes "
8617                                 "%Lu\n", rec->start, rec->nr);
8618                         ret = -EINVAL;
8619                         goto out;
8620                 }
8621         }
8622
8623         /*
8624          * I don't think this can happen currently as we'll abort() if we catch
8625          * this case higher up, but in case somebody removes that we still can't
8626          * deal with it properly here yet, so just bail out of that's the case.
8627          */
8628         if (best->bytenr != rec->start) {
8629                 fprintf(stderr, "Extent start and backref starts don't match, "
8630                         "please use btrfs-image on this file system and send "
8631                         "it to a btrfs developer so they can make fsck fix "
8632                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8633                         rec->start, rec->nr);
8634                 ret = -EINVAL;
8635                 goto out;
8636         }
8637
8638         /*
8639          * Ok great we all agreed on an extent record, let's go find the real
8640          * references and fix up the ones that don't match.
8641          */
8642         list_for_each_entry(back, &rec->backrefs, list) {
8643                 if (back->full_backref || !back->is_data)
8644                         continue;
8645
8646                 dback = to_data_backref(back);
8647
8648                 /*
8649                  * Still ignoring backrefs that don't have a real ref attached
8650                  * to them.
8651                  */
8652                 if (dback->found_ref == 0)
8653                         continue;
8654
8655                 if (dback->bytes == best->bytes &&
8656                     dback->disk_bytenr == best->bytenr)
8657                         continue;
8658
8659                 ret = repair_ref(info, path, dback, best);
8660                 if (ret)
8661                         goto out;
8662         }
8663
8664         /*
8665          * Ok we messed with the actual refs, which means we need to drop our
8666          * entire cache and go back and rescan.  I know this is a huge pain and
8667          * adds a lot of extra work, but it's the only way to be safe.  Once all
8668          * the backrefs agree we may not need to do anything to the extent
8669          * record itself.
8670          */
8671         ret = -EAGAIN;
8672 out:
8673         while (!list_empty(&entries)) {
8674                 entry = list_entry(entries.next, struct extent_entry, list);
8675                 list_del_init(&entry->list);
8676                 free(entry);
8677         }
8678         return ret;
8679 }
8680
8681 static int process_duplicates(struct cache_tree *extent_cache,
8682                               struct extent_record *rec)
8683 {
8684         struct extent_record *good, *tmp;
8685         struct cache_extent *cache;
8686         int ret;
8687
8688         /*
8689          * If we found a extent record for this extent then return, or if we
8690          * have more than one duplicate we are likely going to need to delete
8691          * something.
8692          */
8693         if (rec->found_rec || rec->num_duplicates > 1)
8694                 return 0;
8695
8696         /* Shouldn't happen but just in case */
8697         BUG_ON(!rec->num_duplicates);
8698
8699         /*
8700          * So this happens if we end up with a backref that doesn't match the
8701          * actual extent entry.  So either the backref is bad or the extent
8702          * entry is bad.  Either way we want to have the extent_record actually
8703          * reflect what we found in the extent_tree, so we need to take the
8704          * duplicate out and use that as the extent_record since the only way we
8705          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8706          */
8707         remove_cache_extent(extent_cache, &rec->cache);
8708
8709         good = to_extent_record(rec->dups.next);
8710         list_del_init(&good->list);
8711         INIT_LIST_HEAD(&good->backrefs);
8712         INIT_LIST_HEAD(&good->dups);
8713         good->cache.start = good->start;
8714         good->cache.size = good->nr;
8715         good->content_checked = 0;
8716         good->owner_ref_checked = 0;
8717         good->num_duplicates = 0;
8718         good->refs = rec->refs;
8719         list_splice_init(&rec->backrefs, &good->backrefs);
8720         while (1) {
8721                 cache = lookup_cache_extent(extent_cache, good->start,
8722                                             good->nr);
8723                 if (!cache)
8724                         break;
8725                 tmp = container_of(cache, struct extent_record, cache);
8726
8727                 /*
8728                  * If we find another overlapping extent and it's found_rec is
8729                  * set then it's a duplicate and we need to try and delete
8730                  * something.
8731                  */
8732                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8733                         if (list_empty(&good->list))
8734                                 list_add_tail(&good->list,
8735                                               &duplicate_extents);
8736                         good->num_duplicates += tmp->num_duplicates + 1;
8737                         list_splice_init(&tmp->dups, &good->dups);
8738                         list_del_init(&tmp->list);
8739                         list_add_tail(&tmp->list, &good->dups);
8740                         remove_cache_extent(extent_cache, &tmp->cache);
8741                         continue;
8742                 }
8743
8744                 /*
8745                  * Ok we have another non extent item backed extent rec, so lets
8746                  * just add it to this extent and carry on like we did above.
8747                  */
8748                 good->refs += tmp->refs;
8749                 list_splice_init(&tmp->backrefs, &good->backrefs);
8750                 remove_cache_extent(extent_cache, &tmp->cache);
8751                 free(tmp);
8752         }
8753         ret = insert_cache_extent(extent_cache, &good->cache);
8754         BUG_ON(ret);
8755         free(rec);
8756         return good->num_duplicates ? 0 : 1;
8757 }
8758
8759 static int delete_duplicate_records(struct btrfs_root *root,
8760                                     struct extent_record *rec)
8761 {
8762         struct btrfs_trans_handle *trans;
8763         LIST_HEAD(delete_list);
8764         struct btrfs_path path;
8765         struct extent_record *tmp, *good, *n;
8766         int nr_del = 0;
8767         int ret = 0, err;
8768         struct btrfs_key key;
8769
8770         btrfs_init_path(&path);
8771
8772         good = rec;
8773         /* Find the record that covers all of the duplicates. */
8774         list_for_each_entry(tmp, &rec->dups, list) {
8775                 if (good->start < tmp->start)
8776                         continue;
8777                 if (good->nr > tmp->nr)
8778                         continue;
8779
8780                 if (tmp->start + tmp->nr < good->start + good->nr) {
8781                         fprintf(stderr, "Ok we have overlapping extents that "
8782                                 "aren't completely covered by each other, this "
8783                                 "is going to require more careful thought.  "
8784                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8785                                 tmp->start, tmp->nr, good->start, good->nr);
8786                         abort();
8787                 }
8788                 good = tmp;
8789         }
8790
8791         if (good != rec)
8792                 list_add_tail(&rec->list, &delete_list);
8793
8794         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8795                 if (tmp == good)
8796                         continue;
8797                 list_move_tail(&tmp->list, &delete_list);
8798         }
8799
8800         root = root->fs_info->extent_root;
8801         trans = btrfs_start_transaction(root, 1);
8802         if (IS_ERR(trans)) {
8803                 ret = PTR_ERR(trans);
8804                 goto out;
8805         }
8806
8807         list_for_each_entry(tmp, &delete_list, list) {
8808                 if (tmp->found_rec == 0)
8809                         continue;
8810                 key.objectid = tmp->start;
8811                 key.type = BTRFS_EXTENT_ITEM_KEY;
8812                 key.offset = tmp->nr;
8813
8814                 /* Shouldn't happen but just in case */
8815                 if (tmp->metadata) {
8816                         fprintf(stderr, "Well this shouldn't happen, extent "
8817                                 "record overlaps but is metadata? "
8818                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8819                         abort();
8820                 }
8821
8822                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8823                 if (ret) {
8824                         if (ret > 0)
8825                                 ret = -EINVAL;
8826                         break;
8827                 }
8828                 ret = btrfs_del_item(trans, root, &path);
8829                 if (ret)
8830                         break;
8831                 btrfs_release_path(&path);
8832                 nr_del++;
8833         }
8834         err = btrfs_commit_transaction(trans, root);
8835         if (err && !ret)
8836                 ret = err;
8837 out:
8838         while (!list_empty(&delete_list)) {
8839                 tmp = to_extent_record(delete_list.next);
8840                 list_del_init(&tmp->list);
8841                 if (tmp == rec)
8842                         continue;
8843                 free(tmp);
8844         }
8845
8846         while (!list_empty(&rec->dups)) {
8847                 tmp = to_extent_record(rec->dups.next);
8848                 list_del_init(&tmp->list);
8849                 free(tmp);
8850         }
8851
8852         btrfs_release_path(&path);
8853
8854         if (!ret && !nr_del)
8855                 rec->num_duplicates = 0;
8856
8857         return ret ? ret : nr_del;
8858 }
8859
8860 static int find_possible_backrefs(struct btrfs_fs_info *info,
8861                                   struct btrfs_path *path,
8862                                   struct cache_tree *extent_cache,
8863                                   struct extent_record *rec)
8864 {
8865         struct btrfs_root *root;
8866         struct extent_backref *back;
8867         struct data_backref *dback;
8868         struct cache_extent *cache;
8869         struct btrfs_file_extent_item *fi;
8870         struct btrfs_key key;
8871         u64 bytenr, bytes;
8872         int ret;
8873
8874         list_for_each_entry(back, &rec->backrefs, list) {
8875                 /* Don't care about full backrefs (poor unloved backrefs) */
8876                 if (back->full_backref || !back->is_data)
8877                         continue;
8878
8879                 dback = to_data_backref(back);
8880
8881                 /* We found this one, we don't need to do a lookup */
8882                 if (dback->found_ref)
8883                         continue;
8884
8885                 key.objectid = dback->root;
8886                 key.type = BTRFS_ROOT_ITEM_KEY;
8887                 key.offset = (u64)-1;
8888
8889                 root = btrfs_read_fs_root(info, &key);
8890
8891                 /* No root, definitely a bad ref, skip */
8892                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8893                         continue;
8894                 /* Other err, exit */
8895                 if (IS_ERR(root))
8896                         return PTR_ERR(root);
8897
8898                 key.objectid = dback->owner;
8899                 key.type = BTRFS_EXTENT_DATA_KEY;
8900                 key.offset = dback->offset;
8901                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8902                 if (ret) {
8903                         btrfs_release_path(path);
8904                         if (ret < 0)
8905                                 return ret;
8906                         /* Didn't find it, we can carry on */
8907                         ret = 0;
8908                         continue;
8909                 }
8910
8911                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8912                                     struct btrfs_file_extent_item);
8913                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8914                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8915                 btrfs_release_path(path);
8916                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8917                 if (cache) {
8918                         struct extent_record *tmp;
8919                         tmp = container_of(cache, struct extent_record, cache);
8920
8921                         /*
8922                          * If we found an extent record for the bytenr for this
8923                          * particular backref then we can't add it to our
8924                          * current extent record.  We only want to add backrefs
8925                          * that don't have a corresponding extent item in the
8926                          * extent tree since they likely belong to this record
8927                          * and we need to fix it if it doesn't match bytenrs.
8928                          */
8929                         if  (tmp->found_rec)
8930                                 continue;
8931                 }
8932
8933                 dback->found_ref += 1;
8934                 dback->disk_bytenr = bytenr;
8935                 dback->bytes = bytes;
8936
8937                 /*
8938                  * Set this so the verify backref code knows not to trust the
8939                  * values in this backref.
8940                  */
8941                 back->broken = 1;
8942         }
8943
8944         return 0;
8945 }
8946
8947 /*
8948  * Record orphan data ref into corresponding root.
8949  *
8950  * Return 0 if the extent item contains data ref and recorded.
8951  * Return 1 if the extent item contains no useful data ref
8952  *   On that case, it may contains only shared_dataref or metadata backref
8953  *   or the file extent exists(this should be handled by the extent bytenr
8954  *   recovery routine)
8955  * Return <0 if something goes wrong.
8956  */
8957 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8958                                       struct extent_record *rec)
8959 {
8960         struct btrfs_key key;
8961         struct btrfs_root *dest_root;
8962         struct extent_backref *back;
8963         struct data_backref *dback;
8964         struct orphan_data_extent *orphan;
8965         struct btrfs_path path;
8966         int recorded_data_ref = 0;
8967         int ret = 0;
8968
8969         if (rec->metadata)
8970                 return 1;
8971         btrfs_init_path(&path);
8972         list_for_each_entry(back, &rec->backrefs, list) {
8973                 if (back->full_backref || !back->is_data ||
8974                     !back->found_extent_tree)
8975                         continue;
8976                 dback = to_data_backref(back);
8977                 if (dback->found_ref)
8978                         continue;
8979                 key.objectid = dback->root;
8980                 key.type = BTRFS_ROOT_ITEM_KEY;
8981                 key.offset = (u64)-1;
8982
8983                 dest_root = btrfs_read_fs_root(fs_info, &key);
8984
8985                 /* For non-exist root we just skip it */
8986                 if (IS_ERR(dest_root) || !dest_root)
8987                         continue;
8988
8989                 key.objectid = dback->owner;
8990                 key.type = BTRFS_EXTENT_DATA_KEY;
8991                 key.offset = dback->offset;
8992
8993                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8994                 btrfs_release_path(&path);
8995                 /*
8996                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8997                  * we need to record it for inode/file extent rebuild.
8998                  * For ret > 0, we record it only for file extent rebuild.
8999                  * For ret == 0, the file extent exists but only bytenr
9000                  * mismatch, let the original bytenr fix routine to handle,
9001                  * don't record it.
9002                  */
9003                 if (ret == 0)
9004                         continue;
9005                 ret = 0;
9006                 orphan = malloc(sizeof(*orphan));
9007                 if (!orphan) {
9008                         ret = -ENOMEM;
9009                         goto out;
9010                 }
9011                 INIT_LIST_HEAD(&orphan->list);
9012                 orphan->root = dback->root;
9013                 orphan->objectid = dback->owner;
9014                 orphan->offset = dback->offset;
9015                 orphan->disk_bytenr = rec->cache.start;
9016                 orphan->disk_len = rec->cache.size;
9017                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9018                 recorded_data_ref = 1;
9019         }
9020 out:
9021         btrfs_release_path(&path);
9022         if (!ret)
9023                 return !recorded_data_ref;
9024         else
9025                 return ret;
9026 }
9027
9028 /*
9029  * when an incorrect extent item is found, this will delete
9030  * all of the existing entries for it and recreate them
9031  * based on what the tree scan found.
9032  */
9033 static int fixup_extent_refs(struct btrfs_fs_info *info,
9034                              struct cache_tree *extent_cache,
9035                              struct extent_record *rec)
9036 {
9037         struct btrfs_trans_handle *trans = NULL;
9038         int ret;
9039         struct btrfs_path path;
9040         struct list_head *cur = rec->backrefs.next;
9041         struct cache_extent *cache;
9042         struct extent_backref *back;
9043         int allocated = 0;
9044         u64 flags = 0;
9045
9046         if (rec->flag_block_full_backref)
9047                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9048
9049         btrfs_init_path(&path);
9050         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9051                 /*
9052                  * Sometimes the backrefs themselves are so broken they don't
9053                  * get attached to any meaningful rec, so first go back and
9054                  * check any of our backrefs that we couldn't find and throw
9055                  * them into the list if we find the backref so that
9056                  * verify_backrefs can figure out what to do.
9057                  */
9058                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9059                 if (ret < 0)
9060                         goto out;
9061         }
9062
9063         /* step one, make sure all of the backrefs agree */
9064         ret = verify_backrefs(info, &path, rec);
9065         if (ret < 0)
9066                 goto out;
9067
9068         trans = btrfs_start_transaction(info->extent_root, 1);
9069         if (IS_ERR(trans)) {
9070                 ret = PTR_ERR(trans);
9071                 goto out;
9072         }
9073
9074         /* step two, delete all the existing records */
9075         ret = delete_extent_records(trans, info->extent_root, &path,
9076                                     rec->start);
9077
9078         if (ret < 0)
9079                 goto out;
9080
9081         /* was this block corrupt?  If so, don't add references to it */
9082         cache = lookup_cache_extent(info->corrupt_blocks,
9083                                     rec->start, rec->max_size);
9084         if (cache) {
9085                 ret = 0;
9086                 goto out;
9087         }
9088
9089         /* step three, recreate all the refs we did find */
9090         while(cur != &rec->backrefs) {
9091                 back = to_extent_backref(cur);
9092                 cur = cur->next;
9093
9094                 /*
9095                  * if we didn't find any references, don't create a
9096                  * new extent record
9097                  */
9098                 if (!back->found_ref)
9099                         continue;
9100
9101                 rec->bad_full_backref = 0;
9102                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9103                 allocated = 1;
9104
9105                 if (ret)
9106                         goto out;
9107         }
9108 out:
9109         if (trans) {
9110                 int err = btrfs_commit_transaction(trans, info->extent_root);
9111                 if (!ret)
9112                         ret = err;
9113         }
9114
9115         if (!ret)
9116                 fprintf(stderr, "Repaired extent references for %llu\n",
9117                                 (unsigned long long)rec->start);
9118
9119         btrfs_release_path(&path);
9120         return ret;
9121 }
9122
9123 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9124                               struct extent_record *rec)
9125 {
9126         struct btrfs_trans_handle *trans;
9127         struct btrfs_root *root = fs_info->extent_root;
9128         struct btrfs_path path;
9129         struct btrfs_extent_item *ei;
9130         struct btrfs_key key;
9131         u64 flags;
9132         int ret = 0;
9133
9134         key.objectid = rec->start;
9135         if (rec->metadata) {
9136                 key.type = BTRFS_METADATA_ITEM_KEY;
9137                 key.offset = rec->info_level;
9138         } else {
9139                 key.type = BTRFS_EXTENT_ITEM_KEY;
9140                 key.offset = rec->max_size;
9141         }
9142
9143         trans = btrfs_start_transaction(root, 0);
9144         if (IS_ERR(trans))
9145                 return PTR_ERR(trans);
9146
9147         btrfs_init_path(&path);
9148         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9149         if (ret < 0) {
9150                 btrfs_release_path(&path);
9151                 btrfs_commit_transaction(trans, root);
9152                 return ret;
9153         } else if (ret) {
9154                 fprintf(stderr, "Didn't find extent for %llu\n",
9155                         (unsigned long long)rec->start);
9156                 btrfs_release_path(&path);
9157                 btrfs_commit_transaction(trans, root);
9158                 return -ENOENT;
9159         }
9160
9161         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9162                             struct btrfs_extent_item);
9163         flags = btrfs_extent_flags(path.nodes[0], ei);
9164         if (rec->flag_block_full_backref) {
9165                 fprintf(stderr, "setting full backref on %llu\n",
9166                         (unsigned long long)key.objectid);
9167                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9168         } else {
9169                 fprintf(stderr, "clearing full backref on %llu\n",
9170                         (unsigned long long)key.objectid);
9171                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9172         }
9173         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9174         btrfs_mark_buffer_dirty(path.nodes[0]);
9175         btrfs_release_path(&path);
9176         ret = btrfs_commit_transaction(trans, root);
9177         if (!ret)
9178                 fprintf(stderr, "Repaired extent flags for %llu\n",
9179                                 (unsigned long long)rec->start);
9180
9181         return ret;
9182 }
9183
9184 /* right now we only prune from the extent allocation tree */
9185 static int prune_one_block(struct btrfs_trans_handle *trans,
9186                            struct btrfs_fs_info *info,
9187                            struct btrfs_corrupt_block *corrupt)
9188 {
9189         int ret;
9190         struct btrfs_path path;
9191         struct extent_buffer *eb;
9192         u64 found;
9193         int slot;
9194         int nritems;
9195         int level = corrupt->level + 1;
9196
9197         btrfs_init_path(&path);
9198 again:
9199         /* we want to stop at the parent to our busted block */
9200         path.lowest_level = level;
9201
9202         ret = btrfs_search_slot(trans, info->extent_root,
9203                                 &corrupt->key, &path, -1, 1);
9204
9205         if (ret < 0)
9206                 goto out;
9207
9208         eb = path.nodes[level];
9209         if (!eb) {
9210                 ret = -ENOENT;
9211                 goto out;
9212         }
9213
9214         /*
9215          * hopefully the search gave us the block we want to prune,
9216          * lets try that first
9217          */
9218         slot = path.slots[level];
9219         found =  btrfs_node_blockptr(eb, slot);
9220         if (found == corrupt->cache.start)
9221                 goto del_ptr;
9222
9223         nritems = btrfs_header_nritems(eb);
9224
9225         /* the search failed, lets scan this node and hope we find it */
9226         for (slot = 0; slot < nritems; slot++) {
9227                 found =  btrfs_node_blockptr(eb, slot);
9228                 if (found == corrupt->cache.start)
9229                         goto del_ptr;
9230         }
9231         /*
9232          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9233          * to this block
9234          */
9235         if (eb == info->extent_root->node) {
9236                 ret = -ENOENT;
9237                 goto out;
9238         } else {
9239                 level++;
9240                 btrfs_release_path(&path);
9241                 goto again;
9242         }
9243
9244 del_ptr:
9245         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9246         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9247
9248 out:
9249         btrfs_release_path(&path);
9250         return ret;
9251 }
9252
9253 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9254 {
9255         struct btrfs_trans_handle *trans = NULL;
9256         struct cache_extent *cache;
9257         struct btrfs_corrupt_block *corrupt;
9258
9259         while (1) {
9260                 cache = search_cache_extent(info->corrupt_blocks, 0);
9261                 if (!cache)
9262                         break;
9263                 if (!trans) {
9264                         trans = btrfs_start_transaction(info->extent_root, 1);
9265                         if (IS_ERR(trans))
9266                                 return PTR_ERR(trans);
9267                 }
9268                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9269                 prune_one_block(trans, info, corrupt);
9270                 remove_cache_extent(info->corrupt_blocks, cache);
9271         }
9272         if (trans)
9273                 return btrfs_commit_transaction(trans, info->extent_root);
9274         return 0;
9275 }
9276
9277 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9278 {
9279         struct btrfs_block_group_cache *cache;
9280         u64 start, end;
9281         int ret;
9282
9283         while (1) {
9284                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9285                                             &start, &end, EXTENT_DIRTY);
9286                 if (ret)
9287                         break;
9288                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9289         }
9290
9291         start = 0;
9292         while (1) {
9293                 cache = btrfs_lookup_first_block_group(fs_info, start);
9294                 if (!cache)
9295                         break;
9296                 if (cache->cached)
9297                         cache->cached = 0;
9298                 start = cache->key.objectid + cache->key.offset;
9299         }
9300 }
9301
9302 static int check_extent_refs(struct btrfs_root *root,
9303                              struct cache_tree *extent_cache)
9304 {
9305         struct extent_record *rec;
9306         struct cache_extent *cache;
9307         int ret = 0;
9308         int had_dups = 0;
9309
9310         if (repair) {
9311                 /*
9312                  * if we're doing a repair, we have to make sure
9313                  * we don't allocate from the problem extents.
9314                  * In the worst case, this will be all the
9315                  * extents in the FS
9316                  */
9317                 cache = search_cache_extent(extent_cache, 0);
9318                 while(cache) {
9319                         rec = container_of(cache, struct extent_record, cache);
9320                         set_extent_dirty(root->fs_info->excluded_extents,
9321                                          rec->start,
9322                                          rec->start + rec->max_size - 1);
9323                         cache = next_cache_extent(cache);
9324                 }
9325
9326                 /* pin down all the corrupted blocks too */
9327                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9328                 while(cache) {
9329                         set_extent_dirty(root->fs_info->excluded_extents,
9330                                          cache->start,
9331                                          cache->start + cache->size - 1);
9332                         cache = next_cache_extent(cache);
9333                 }
9334                 prune_corrupt_blocks(root->fs_info);
9335                 reset_cached_block_groups(root->fs_info);
9336         }
9337
9338         reset_cached_block_groups(root->fs_info);
9339
9340         /*
9341          * We need to delete any duplicate entries we find first otherwise we
9342          * could mess up the extent tree when we have backrefs that actually
9343          * belong to a different extent item and not the weird duplicate one.
9344          */
9345         while (repair && !list_empty(&duplicate_extents)) {
9346                 rec = to_extent_record(duplicate_extents.next);
9347                 list_del_init(&rec->list);
9348
9349                 /* Sometimes we can find a backref before we find an actual
9350                  * extent, so we need to process it a little bit to see if there
9351                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9352                  * if this is a backref screwup.  If we need to delete stuff
9353                  * process_duplicates() will return 0, otherwise it will return
9354                  * 1 and we
9355                  */
9356                 if (process_duplicates(extent_cache, rec))
9357                         continue;
9358                 ret = delete_duplicate_records(root, rec);
9359                 if (ret < 0)
9360                         return ret;
9361                 /*
9362                  * delete_duplicate_records will return the number of entries
9363                  * deleted, so if it's greater than 0 then we know we actually
9364                  * did something and we need to remove.
9365                  */
9366                 if (ret)
9367                         had_dups = 1;
9368         }
9369
9370         if (had_dups)
9371                 return -EAGAIN;
9372
9373         while(1) {
9374                 int cur_err = 0;
9375                 int fix = 0;
9376
9377                 cache = search_cache_extent(extent_cache, 0);
9378                 if (!cache)
9379                         break;
9380                 rec = container_of(cache, struct extent_record, cache);
9381                 if (rec->num_duplicates) {
9382                         fprintf(stderr, "extent item %llu has multiple extent "
9383                                 "items\n", (unsigned long long)rec->start);
9384                         cur_err = 1;
9385                 }
9386
9387                 if (rec->refs != rec->extent_item_refs) {
9388                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9389                                 (unsigned long long)rec->start,
9390                                 (unsigned long long)rec->nr);
9391                         fprintf(stderr, "extent item %llu, found %llu\n",
9392                                 (unsigned long long)rec->extent_item_refs,
9393                                 (unsigned long long)rec->refs);
9394                         ret = record_orphan_data_extents(root->fs_info, rec);
9395                         if (ret < 0)
9396                                 goto repair_abort;
9397                         fix = ret;
9398                         cur_err = 1;
9399                 }
9400                 if (all_backpointers_checked(rec, 1)) {
9401                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9402                                 (unsigned long long)rec->start,
9403                                 (unsigned long long)rec->nr);
9404                         fix = 1;
9405                         cur_err = 1;
9406                 }
9407                 if (!rec->owner_ref_checked) {
9408                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9409                                 (unsigned long long)rec->start,
9410                                 (unsigned long long)rec->nr);
9411                         fix = 1;
9412                         cur_err = 1;
9413                 }
9414
9415                 if (repair && fix) {
9416                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9417                         if (ret)
9418                                 goto repair_abort;
9419                 }
9420
9421
9422                 if (rec->bad_full_backref) {
9423                         fprintf(stderr, "bad full backref, on [%llu]\n",
9424                                 (unsigned long long)rec->start);
9425                         if (repair) {
9426                                 ret = fixup_extent_flags(root->fs_info, rec);
9427                                 if (ret)
9428                                         goto repair_abort;
9429                                 fix = 1;
9430                         }
9431                         cur_err = 1;
9432                 }
9433                 /*
9434                  * Although it's not a extent ref's problem, we reuse this
9435                  * routine for error reporting.
9436                  * No repair function yet.
9437                  */
9438                 if (rec->crossing_stripes) {
9439                         fprintf(stderr,
9440                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9441                                 rec->start, rec->start + rec->max_size);
9442                         cur_err = 1;
9443                 }
9444
9445                 if (rec->wrong_chunk_type) {
9446                         fprintf(stderr,
9447                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9448                                 rec->start, rec->start + rec->max_size);
9449                         cur_err = 1;
9450                 }
9451
9452                 remove_cache_extent(extent_cache, cache);
9453                 free_all_extent_backrefs(rec);
9454                 if (!init_extent_tree && repair && (!cur_err || fix))
9455                         clear_extent_dirty(root->fs_info->excluded_extents,
9456                                            rec->start,
9457                                            rec->start + rec->max_size - 1);
9458                 free(rec);
9459         }
9460 repair_abort:
9461         if (repair) {
9462                 if (ret && ret != -EAGAIN) {
9463                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9464                         exit(1);
9465                 } else if (!ret) {
9466                         struct btrfs_trans_handle *trans;
9467
9468                         root = root->fs_info->extent_root;
9469                         trans = btrfs_start_transaction(root, 1);
9470                         if (IS_ERR(trans)) {
9471                                 ret = PTR_ERR(trans);
9472                                 goto repair_abort;
9473                         }
9474
9475                         btrfs_fix_block_accounting(trans, root);
9476                         ret = btrfs_commit_transaction(trans, root);
9477                         if (ret)
9478                                 goto repair_abort;
9479                 }
9480                 return ret;
9481         }
9482         return 0;
9483 }
9484
9485 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9486 {
9487         u64 stripe_size;
9488
9489         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9490                 stripe_size = length;
9491                 stripe_size /= num_stripes;
9492         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9493                 stripe_size = length * 2;
9494                 stripe_size /= num_stripes;
9495         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9496                 stripe_size = length;
9497                 stripe_size /= (num_stripes - 1);
9498         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9499                 stripe_size = length;
9500                 stripe_size /= (num_stripes - 2);
9501         } else {
9502                 stripe_size = length;
9503         }
9504         return stripe_size;
9505 }
9506
9507 /*
9508  * Check the chunk with its block group/dev list ref:
9509  * Return 0 if all refs seems valid.
9510  * Return 1 if part of refs seems valid, need later check for rebuild ref
9511  * like missing block group and needs to search extent tree to rebuild them.
9512  * Return -1 if essential refs are missing and unable to rebuild.
9513  */
9514 static int check_chunk_refs(struct chunk_record *chunk_rec,
9515                             struct block_group_tree *block_group_cache,
9516                             struct device_extent_tree *dev_extent_cache,
9517                             int silent)
9518 {
9519         struct cache_extent *block_group_item;
9520         struct block_group_record *block_group_rec;
9521         struct cache_extent *dev_extent_item;
9522         struct device_extent_record *dev_extent_rec;
9523         u64 devid;
9524         u64 offset;
9525         u64 length;
9526         int metadump_v2 = 0;
9527         int i;
9528         int ret = 0;
9529
9530         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9531                                                chunk_rec->offset,
9532                                                chunk_rec->length);
9533         if (block_group_item) {
9534                 block_group_rec = container_of(block_group_item,
9535                                                struct block_group_record,
9536                                                cache);
9537                 if (chunk_rec->length != block_group_rec->offset ||
9538                     chunk_rec->offset != block_group_rec->objectid ||
9539                     (!metadump_v2 &&
9540                      chunk_rec->type_flags != block_group_rec->flags)) {
9541                         if (!silent)
9542                                 fprintf(stderr,
9543                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9544                                         chunk_rec->objectid,
9545                                         chunk_rec->type,
9546                                         chunk_rec->offset,
9547                                         chunk_rec->length,
9548                                         chunk_rec->offset,
9549                                         chunk_rec->type_flags,
9550                                         block_group_rec->objectid,
9551                                         block_group_rec->type,
9552                                         block_group_rec->offset,
9553                                         block_group_rec->offset,
9554                                         block_group_rec->objectid,
9555                                         block_group_rec->flags);
9556                         ret = -1;
9557                 } else {
9558                         list_del_init(&block_group_rec->list);
9559                         chunk_rec->bg_rec = block_group_rec;
9560                 }
9561         } else {
9562                 if (!silent)
9563                         fprintf(stderr,
9564                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9565                                 chunk_rec->objectid,
9566                                 chunk_rec->type,
9567                                 chunk_rec->offset,
9568                                 chunk_rec->length,
9569                                 chunk_rec->offset,
9570                                 chunk_rec->type_flags);
9571                 ret = 1;
9572         }
9573
9574         if (metadump_v2)
9575                 return ret;
9576
9577         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9578                                     chunk_rec->num_stripes);
9579         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9580                 devid = chunk_rec->stripes[i].devid;
9581                 offset = chunk_rec->stripes[i].offset;
9582                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9583                                                        devid, offset, length);
9584                 if (dev_extent_item) {
9585                         dev_extent_rec = container_of(dev_extent_item,
9586                                                 struct device_extent_record,
9587                                                 cache);
9588                         if (dev_extent_rec->objectid != devid ||
9589                             dev_extent_rec->offset != offset ||
9590                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9591                             dev_extent_rec->length != length) {
9592                                 if (!silent)
9593                                         fprintf(stderr,
9594                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9595                                                 chunk_rec->objectid,
9596                                                 chunk_rec->type,
9597                                                 chunk_rec->offset,
9598                                                 chunk_rec->stripes[i].devid,
9599                                                 chunk_rec->stripes[i].offset,
9600                                                 dev_extent_rec->objectid,
9601                                                 dev_extent_rec->offset,
9602                                                 dev_extent_rec->length);
9603                                 ret = -1;
9604                         } else {
9605                                 list_move(&dev_extent_rec->chunk_list,
9606                                           &chunk_rec->dextents);
9607                         }
9608                 } else {
9609                         if (!silent)
9610                                 fprintf(stderr,
9611                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9612                                         chunk_rec->objectid,
9613                                         chunk_rec->type,
9614                                         chunk_rec->offset,
9615                                         chunk_rec->stripes[i].devid,
9616                                         chunk_rec->stripes[i].offset);
9617                         ret = -1;
9618                 }
9619         }
9620         return ret;
9621 }
9622
9623 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9624 int check_chunks(struct cache_tree *chunk_cache,
9625                  struct block_group_tree *block_group_cache,
9626                  struct device_extent_tree *dev_extent_cache,
9627                  struct list_head *good, struct list_head *bad,
9628                  struct list_head *rebuild, int silent)
9629 {
9630         struct cache_extent *chunk_item;
9631         struct chunk_record *chunk_rec;
9632         struct block_group_record *bg_rec;
9633         struct device_extent_record *dext_rec;
9634         int err;
9635         int ret = 0;
9636
9637         chunk_item = first_cache_extent(chunk_cache);
9638         while (chunk_item) {
9639                 chunk_rec = container_of(chunk_item, struct chunk_record,
9640                                          cache);
9641                 err = check_chunk_refs(chunk_rec, block_group_cache,
9642                                        dev_extent_cache, silent);
9643                 if (err < 0)
9644                         ret = err;
9645                 if (err == 0 && good)
9646                         list_add_tail(&chunk_rec->list, good);
9647                 if (err > 0 && rebuild)
9648                         list_add_tail(&chunk_rec->list, rebuild);
9649                 if (err < 0 && bad)
9650                         list_add_tail(&chunk_rec->list, bad);
9651                 chunk_item = next_cache_extent(chunk_item);
9652         }
9653
9654         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9655                 if (!silent)
9656                         fprintf(stderr,
9657                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9658                                 bg_rec->objectid,
9659                                 bg_rec->offset,
9660                                 bg_rec->flags);
9661                 if (!ret)
9662                         ret = 1;
9663         }
9664
9665         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9666                             chunk_list) {
9667                 if (!silent)
9668                         fprintf(stderr,
9669                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9670                                 dext_rec->objectid,
9671                                 dext_rec->offset,
9672                                 dext_rec->length);
9673                 if (!ret)
9674                         ret = 1;
9675         }
9676         return ret;
9677 }
9678
9679
9680 static int check_device_used(struct device_record *dev_rec,
9681                              struct device_extent_tree *dext_cache)
9682 {
9683         struct cache_extent *cache;
9684         struct device_extent_record *dev_extent_rec;
9685         u64 total_byte = 0;
9686
9687         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9688         while (cache) {
9689                 dev_extent_rec = container_of(cache,
9690                                               struct device_extent_record,
9691                                               cache);
9692                 if (dev_extent_rec->objectid != dev_rec->devid)
9693                         break;
9694
9695                 list_del_init(&dev_extent_rec->device_list);
9696                 total_byte += dev_extent_rec->length;
9697                 cache = next_cache_extent(cache);
9698         }
9699
9700         if (total_byte != dev_rec->byte_used) {
9701                 fprintf(stderr,
9702                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9703                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9704                         dev_rec->type, dev_rec->offset);
9705                 return -1;
9706         } else {
9707                 return 0;
9708         }
9709 }
9710
9711 /* check btrfs_dev_item -> btrfs_dev_extent */
9712 static int check_devices(struct rb_root *dev_cache,
9713                          struct device_extent_tree *dev_extent_cache)
9714 {
9715         struct rb_node *dev_node;
9716         struct device_record *dev_rec;
9717         struct device_extent_record *dext_rec;
9718         int err;
9719         int ret = 0;
9720
9721         dev_node = rb_first(dev_cache);
9722         while (dev_node) {
9723                 dev_rec = container_of(dev_node, struct device_record, node);
9724                 err = check_device_used(dev_rec, dev_extent_cache);
9725                 if (err)
9726                         ret = err;
9727
9728                 dev_node = rb_next(dev_node);
9729         }
9730         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9731                             device_list) {
9732                 fprintf(stderr,
9733                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9734                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9735                 if (!ret)
9736                         ret = 1;
9737         }
9738         return ret;
9739 }
9740
9741 static int add_root_item_to_list(struct list_head *head,
9742                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9743                                   u8 level, u8 drop_level,
9744                                   int level_size, struct btrfs_key *drop_key)
9745 {
9746
9747         struct root_item_record *ri_rec;
9748         ri_rec = malloc(sizeof(*ri_rec));
9749         if (!ri_rec)
9750                 return -ENOMEM;
9751         ri_rec->bytenr = bytenr;
9752         ri_rec->objectid = objectid;
9753         ri_rec->level = level;
9754         ri_rec->level_size = level_size;
9755         ri_rec->drop_level = drop_level;
9756         ri_rec->last_snapshot = last_snapshot;
9757         if (drop_key)
9758                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9759         list_add_tail(&ri_rec->list, head);
9760
9761         return 0;
9762 }
9763
9764 static void free_root_item_list(struct list_head *list)
9765 {
9766         struct root_item_record *ri_rec;
9767
9768         while (!list_empty(list)) {
9769                 ri_rec = list_first_entry(list, struct root_item_record,
9770                                           list);
9771                 list_del_init(&ri_rec->list);
9772                 free(ri_rec);
9773         }
9774 }
9775
9776 static int deal_root_from_list(struct list_head *list,
9777                                struct btrfs_root *root,
9778                                struct block_info *bits,
9779                                int bits_nr,
9780                                struct cache_tree *pending,
9781                                struct cache_tree *seen,
9782                                struct cache_tree *reada,
9783                                struct cache_tree *nodes,
9784                                struct cache_tree *extent_cache,
9785                                struct cache_tree *chunk_cache,
9786                                struct rb_root *dev_cache,
9787                                struct block_group_tree *block_group_cache,
9788                                struct device_extent_tree *dev_extent_cache)
9789 {
9790         int ret = 0;
9791         u64 last;
9792
9793         while (!list_empty(list)) {
9794                 struct root_item_record *rec;
9795                 struct extent_buffer *buf;
9796                 rec = list_entry(list->next,
9797                                  struct root_item_record, list);
9798                 last = 0;
9799                 buf = read_tree_block(root->fs_info,
9800                                       rec->bytenr, rec->level_size, 0);
9801                 if (!extent_buffer_uptodate(buf)) {
9802                         free_extent_buffer(buf);
9803                         ret = -EIO;
9804                         break;
9805                 }
9806                 ret = add_root_to_pending(buf, extent_cache, pending,
9807                                     seen, nodes, rec->objectid);
9808                 if (ret < 0)
9809                         break;
9810                 /*
9811                  * To rebuild extent tree, we need deal with snapshot
9812                  * one by one, otherwise we deal with node firstly which
9813                  * can maximize readahead.
9814                  */
9815                 while (1) {
9816                         ret = run_next_block(root, bits, bits_nr, &last,
9817                                              pending, seen, reada, nodes,
9818                                              extent_cache, chunk_cache,
9819                                              dev_cache, block_group_cache,
9820                                              dev_extent_cache, rec);
9821                         if (ret != 0)
9822                                 break;
9823                 }
9824                 free_extent_buffer(buf);
9825                 list_del(&rec->list);
9826                 free(rec);
9827                 if (ret < 0)
9828                         break;
9829         }
9830         while (ret >= 0) {
9831                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9832                                      reada, nodes, extent_cache, chunk_cache,
9833                                      dev_cache, block_group_cache,
9834                                      dev_extent_cache, NULL);
9835                 if (ret != 0) {
9836                         if (ret > 0)
9837                                 ret = 0;
9838                         break;
9839                 }
9840         }
9841         return ret;
9842 }
9843
9844 static int check_chunks_and_extents(struct btrfs_root *root)
9845 {
9846         struct rb_root dev_cache;
9847         struct cache_tree chunk_cache;
9848         struct block_group_tree block_group_cache;
9849         struct device_extent_tree dev_extent_cache;
9850         struct cache_tree extent_cache;
9851         struct cache_tree seen;
9852         struct cache_tree pending;
9853         struct cache_tree reada;
9854         struct cache_tree nodes;
9855         struct extent_io_tree excluded_extents;
9856         struct cache_tree corrupt_blocks;
9857         struct btrfs_path path;
9858         struct btrfs_key key;
9859         struct btrfs_key found_key;
9860         int ret, err = 0;
9861         struct block_info *bits;
9862         int bits_nr;
9863         struct extent_buffer *leaf;
9864         int slot;
9865         struct btrfs_root_item ri;
9866         struct list_head dropping_trees;
9867         struct list_head normal_trees;
9868         struct btrfs_root *root1;
9869         u64 objectid;
9870         u32 level_size;
9871         u8 level;
9872
9873         dev_cache = RB_ROOT;
9874         cache_tree_init(&chunk_cache);
9875         block_group_tree_init(&block_group_cache);
9876         device_extent_tree_init(&dev_extent_cache);
9877
9878         cache_tree_init(&extent_cache);
9879         cache_tree_init(&seen);
9880         cache_tree_init(&pending);
9881         cache_tree_init(&nodes);
9882         cache_tree_init(&reada);
9883         cache_tree_init(&corrupt_blocks);
9884         extent_io_tree_init(&excluded_extents);
9885         INIT_LIST_HEAD(&dropping_trees);
9886         INIT_LIST_HEAD(&normal_trees);
9887
9888         if (repair) {
9889                 root->fs_info->excluded_extents = &excluded_extents;
9890                 root->fs_info->fsck_extent_cache = &extent_cache;
9891                 root->fs_info->free_extent_hook = free_extent_hook;
9892                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9893         }
9894
9895         bits_nr = 1024;
9896         bits = malloc(bits_nr * sizeof(struct block_info));
9897         if (!bits) {
9898                 perror("malloc");
9899                 exit(1);
9900         }
9901
9902         if (ctx.progress_enabled) {
9903                 ctx.tp = TASK_EXTENTS;
9904                 task_start(ctx.info);
9905         }
9906
9907 again:
9908         root1 = root->fs_info->tree_root;
9909         level = btrfs_header_level(root1->node);
9910         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9911                                     root1->node->start, 0, level, 0,
9912                                     root1->fs_info->nodesize, NULL);
9913         if (ret < 0)
9914                 goto out;
9915         root1 = root->fs_info->chunk_root;
9916         level = btrfs_header_level(root1->node);
9917         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9918                                     root1->node->start, 0, level, 0,
9919                                     root1->fs_info->nodesize, NULL);
9920         if (ret < 0)
9921                 goto out;
9922         btrfs_init_path(&path);
9923         key.offset = 0;
9924         key.objectid = 0;
9925         key.type = BTRFS_ROOT_ITEM_KEY;
9926         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9927                                         &key, &path, 0, 0);
9928         if (ret < 0)
9929                 goto out;
9930         while(1) {
9931                 leaf = path.nodes[0];
9932                 slot = path.slots[0];
9933                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9934                         ret = btrfs_next_leaf(root, &path);
9935                         if (ret != 0)
9936                                 break;
9937                         leaf = path.nodes[0];
9938                         slot = path.slots[0];
9939                 }
9940                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9941                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9942                         unsigned long offset;
9943                         u64 last_snapshot;
9944
9945                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9946                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9947                         last_snapshot = btrfs_root_last_snapshot(&ri);
9948                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9949                                 level = btrfs_root_level(&ri);
9950                                 level_size = root->fs_info->nodesize;
9951                                 ret = add_root_item_to_list(&normal_trees,
9952                                                 found_key.objectid,
9953                                                 btrfs_root_bytenr(&ri),
9954                                                 last_snapshot, level,
9955                                                 0, level_size, NULL);
9956                                 if (ret < 0)
9957                                         goto out;
9958                         } else {
9959                                 level = btrfs_root_level(&ri);
9960                                 level_size = root->fs_info->nodesize;
9961                                 objectid = found_key.objectid;
9962                                 btrfs_disk_key_to_cpu(&found_key,
9963                                                       &ri.drop_progress);
9964                                 ret = add_root_item_to_list(&dropping_trees,
9965                                                 objectid,
9966                                                 btrfs_root_bytenr(&ri),
9967                                                 last_snapshot, level,
9968                                                 ri.drop_level,
9969                                                 level_size, &found_key);
9970                                 if (ret < 0)
9971                                         goto out;
9972                         }
9973                 }
9974                 path.slots[0]++;
9975         }
9976         btrfs_release_path(&path);
9977
9978         /*
9979          * check_block can return -EAGAIN if it fixes something, please keep
9980          * this in mind when dealing with return values from these functions, if
9981          * we get -EAGAIN we want to fall through and restart the loop.
9982          */
9983         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9984                                   &seen, &reada, &nodes, &extent_cache,
9985                                   &chunk_cache, &dev_cache, &block_group_cache,
9986                                   &dev_extent_cache);
9987         if (ret < 0) {
9988                 if (ret == -EAGAIN)
9989                         goto loop;
9990                 goto out;
9991         }
9992         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9993                                   &pending, &seen, &reada, &nodes,
9994                                   &extent_cache, &chunk_cache, &dev_cache,
9995                                   &block_group_cache, &dev_extent_cache);
9996         if (ret < 0) {
9997                 if (ret == -EAGAIN)
9998                         goto loop;
9999                 goto out;
10000         }
10001
10002         ret = check_chunks(&chunk_cache, &block_group_cache,
10003                            &dev_extent_cache, NULL, NULL, NULL, 0);
10004         if (ret) {
10005                 if (ret == -EAGAIN)
10006                         goto loop;
10007                 err = ret;
10008         }
10009
10010         ret = check_extent_refs(root, &extent_cache);
10011         if (ret < 0) {
10012                 if (ret == -EAGAIN)
10013                         goto loop;
10014                 goto out;
10015         }
10016
10017         ret = check_devices(&dev_cache, &dev_extent_cache);
10018         if (ret && err)
10019                 ret = err;
10020
10021 out:
10022         task_stop(ctx.info);
10023         if (repair) {
10024                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10025                 extent_io_tree_cleanup(&excluded_extents);
10026                 root->fs_info->fsck_extent_cache = NULL;
10027                 root->fs_info->free_extent_hook = NULL;
10028                 root->fs_info->corrupt_blocks = NULL;
10029                 root->fs_info->excluded_extents = NULL;
10030         }
10031         free(bits);
10032         free_chunk_cache_tree(&chunk_cache);
10033         free_device_cache_tree(&dev_cache);
10034         free_block_group_tree(&block_group_cache);
10035         free_device_extent_tree(&dev_extent_cache);
10036         free_extent_cache_tree(&seen);
10037         free_extent_cache_tree(&pending);
10038         free_extent_cache_tree(&reada);
10039         free_extent_cache_tree(&nodes);
10040         free_root_item_list(&normal_trees);
10041         free_root_item_list(&dropping_trees);
10042         return ret;
10043 loop:
10044         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10045         free_extent_cache_tree(&seen);
10046         free_extent_cache_tree(&pending);
10047         free_extent_cache_tree(&reada);
10048         free_extent_cache_tree(&nodes);
10049         free_chunk_cache_tree(&chunk_cache);
10050         free_block_group_tree(&block_group_cache);
10051         free_device_cache_tree(&dev_cache);
10052         free_device_extent_tree(&dev_extent_cache);
10053         free_extent_record_cache(&extent_cache);
10054         free_root_item_list(&normal_trees);
10055         free_root_item_list(&dropping_trees);
10056         extent_io_tree_cleanup(&excluded_extents);
10057         goto again;
10058 }
10059
10060 /*
10061  * Check backrefs of a tree block given by @bytenr or @eb.
10062  *
10063  * @root:       the root containing the @bytenr or @eb
10064  * @eb:         tree block extent buffer, can be NULL
10065  * @bytenr:     bytenr of the tree block to search
10066  * @level:      tree level of the tree block
10067  * @owner:      owner of the tree block
10068  *
10069  * Return >0 for any error found and output error message
10070  * Return 0 for no error found
10071  */
10072 static int check_tree_block_ref(struct btrfs_root *root,
10073                                 struct extent_buffer *eb, u64 bytenr,
10074                                 int level, u64 owner)
10075 {
10076         struct btrfs_key key;
10077         struct btrfs_root *extent_root = root->fs_info->extent_root;
10078         struct btrfs_path path;
10079         struct btrfs_extent_item *ei;
10080         struct btrfs_extent_inline_ref *iref;
10081         struct extent_buffer *leaf;
10082         unsigned long end;
10083         unsigned long ptr;
10084         int slot;
10085         int skinny_level;
10086         int type;
10087         u32 nodesize = root->fs_info->nodesize;
10088         u32 item_size;
10089         u64 offset;
10090         int tree_reloc_root = 0;
10091         int found_ref = 0;
10092         int err = 0;
10093         int ret;
10094
10095         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10096             btrfs_header_bytenr(root->node) == bytenr)
10097                 tree_reloc_root = 1;
10098
10099         btrfs_init_path(&path);
10100         key.objectid = bytenr;
10101         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10102                 key.type = BTRFS_METADATA_ITEM_KEY;
10103         else
10104                 key.type = BTRFS_EXTENT_ITEM_KEY;
10105         key.offset = (u64)-1;
10106
10107         /* Search for the backref in extent tree */
10108         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10109         if (ret < 0) {
10110                 err |= BACKREF_MISSING;
10111                 goto out;
10112         }
10113         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10114         if (ret) {
10115                 err |= BACKREF_MISSING;
10116                 goto out;
10117         }
10118
10119         leaf = path.nodes[0];
10120         slot = path.slots[0];
10121         btrfs_item_key_to_cpu(leaf, &key, slot);
10122
10123         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10124
10125         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10126                 skinny_level = (int)key.offset;
10127                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10128         } else {
10129                 struct btrfs_tree_block_info *info;
10130
10131                 info = (struct btrfs_tree_block_info *)(ei + 1);
10132                 skinny_level = btrfs_tree_block_level(leaf, info);
10133                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10134         }
10135
10136         if (eb) {
10137                 u64 header_gen;
10138                 u64 extent_gen;
10139
10140                 if (!(btrfs_extent_flags(leaf, ei) &
10141                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10142                         error(
10143                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10144                                 key.objectid, nodesize,
10145                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10146                         err = BACKREF_MISMATCH;
10147                 }
10148                 header_gen = btrfs_header_generation(eb);
10149                 extent_gen = btrfs_extent_generation(leaf, ei);
10150                 if (header_gen != extent_gen) {
10151                         error(
10152         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10153                                 key.objectid, nodesize, header_gen,
10154                                 extent_gen);
10155                         err = BACKREF_MISMATCH;
10156                 }
10157                 if (level != skinny_level) {
10158                         error(
10159                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10160                                 key.objectid, nodesize, level, skinny_level);
10161                         err = BACKREF_MISMATCH;
10162                 }
10163                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10164                         error(
10165                         "extent[%llu %u] is referred by other roots than %llu",
10166                                 key.objectid, nodesize, root->objectid);
10167                         err = BACKREF_MISMATCH;
10168                 }
10169         }
10170
10171         /*
10172          * Iterate the extent/metadata item to find the exact backref
10173          */
10174         item_size = btrfs_item_size_nr(leaf, slot);
10175         ptr = (unsigned long)iref;
10176         end = (unsigned long)ei + item_size;
10177         while (ptr < end) {
10178                 iref = (struct btrfs_extent_inline_ref *)ptr;
10179                 type = btrfs_extent_inline_ref_type(leaf, iref);
10180                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10181
10182                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10183                         (offset == root->objectid || offset == owner)) {
10184                         found_ref = 1;
10185                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10186                         /*
10187                          * Backref of tree reloc root points to itself, no need
10188                          * to check backref any more.
10189                          */
10190                         if (tree_reloc_root)
10191                                 found_ref = 1;
10192                         else
10193                         /* Check if the backref points to valid referencer */
10194                                 found_ref = !check_tree_block_ref(root, NULL,
10195                                                 offset, level + 1, owner);
10196                 }
10197
10198                 if (found_ref)
10199                         break;
10200                 ptr += btrfs_extent_inline_ref_size(type);
10201         }
10202
10203         /*
10204          * Inlined extent item doesn't have what we need, check
10205          * TREE_BLOCK_REF_KEY
10206          */
10207         if (!found_ref) {
10208                 btrfs_release_path(&path);
10209                 key.objectid = bytenr;
10210                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10211                 key.offset = root->objectid;
10212
10213                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10214                 if (!ret)
10215                         found_ref = 1;
10216         }
10217         if (!found_ref)
10218                 err |= BACKREF_MISSING;
10219 out:
10220         btrfs_release_path(&path);
10221         if (eb && (err & BACKREF_MISSING))
10222                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10223                         bytenr, nodesize, owner, level);
10224         return err;
10225 }
10226
10227 /*
10228  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10229  *
10230  * Return >0 any error found and output error message
10231  * Return 0 for no error found
10232  */
10233 static int check_extent_data_item(struct btrfs_root *root,
10234                                   struct extent_buffer *eb, int slot)
10235 {
10236         struct btrfs_file_extent_item *fi;
10237         struct btrfs_path path;
10238         struct btrfs_root *extent_root = root->fs_info->extent_root;
10239         struct btrfs_key fi_key;
10240         struct btrfs_key dbref_key;
10241         struct extent_buffer *leaf;
10242         struct btrfs_extent_item *ei;
10243         struct btrfs_extent_inline_ref *iref;
10244         struct btrfs_extent_data_ref *dref;
10245         u64 owner;
10246         u64 disk_bytenr;
10247         u64 disk_num_bytes;
10248         u64 extent_num_bytes;
10249         u64 extent_flags;
10250         u32 item_size;
10251         unsigned long end;
10252         unsigned long ptr;
10253         int type;
10254         u64 ref_root;
10255         int found_dbackref = 0;
10256         int err = 0;
10257         int ret;
10258
10259         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10260         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10261
10262         /* Nothing to check for hole and inline data extents */
10263         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10264             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10265                 return 0;
10266
10267         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10268         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10269         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10270
10271         /* Check unaligned disk_num_bytes and num_bytes */
10272         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10273                 error(
10274 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10275                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10276                         root->fs_info->sectorsize);
10277                 err |= BYTES_UNALIGNED;
10278         } else {
10279                 data_bytes_allocated += disk_num_bytes;
10280         }
10281         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10282                 error(
10283 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10284                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10285                         root->fs_info->sectorsize);
10286                 err |= BYTES_UNALIGNED;
10287         } else {
10288                 data_bytes_referenced += extent_num_bytes;
10289         }
10290         owner = btrfs_header_owner(eb);
10291
10292         /* Check the extent item of the file extent in extent tree */
10293         btrfs_init_path(&path);
10294         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10295         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10296         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10297
10298         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10299         if (ret)
10300                 goto out;
10301
10302         leaf = path.nodes[0];
10303         slot = path.slots[0];
10304         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10305
10306         extent_flags = btrfs_extent_flags(leaf, ei);
10307
10308         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10309                 error(
10310                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10311                     disk_bytenr, disk_num_bytes,
10312                     BTRFS_EXTENT_FLAG_DATA);
10313                 err |= BACKREF_MISMATCH;
10314         }
10315
10316         /* Check data backref inside that extent item */
10317         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10318         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10319         ptr = (unsigned long)iref;
10320         end = (unsigned long)ei + item_size;
10321         while (ptr < end) {
10322                 iref = (struct btrfs_extent_inline_ref *)ptr;
10323                 type = btrfs_extent_inline_ref_type(leaf, iref);
10324                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10325
10326                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10327                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10328                         if (ref_root == owner || ref_root == root->objectid)
10329                                 found_dbackref = 1;
10330                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10331                         found_dbackref = !check_tree_block_ref(root, NULL,
10332                                 btrfs_extent_inline_ref_offset(leaf, iref),
10333                                 0, owner);
10334                 }
10335
10336                 if (found_dbackref)
10337                         break;
10338                 ptr += btrfs_extent_inline_ref_size(type);
10339         }
10340
10341         if (!found_dbackref) {
10342                 btrfs_release_path(&path);
10343
10344                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10345                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10346                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10347                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10348                                 fi_key.objectid, fi_key.offset);
10349
10350                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10351                                         &dbref_key, &path, 0, 0);
10352                 if (!ret) {
10353                         found_dbackref = 1;
10354                         goto out;
10355                 }
10356
10357                 btrfs_release_path(&path);
10358
10359                 /*
10360                  * Neither inlined nor EXTENT_DATA_REF found, try
10361                  * SHARED_DATA_REF as last chance.
10362                  */
10363                 dbref_key.objectid = disk_bytenr;
10364                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10365                 dbref_key.offset = eb->start;
10366
10367                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10368                                         &dbref_key, &path, 0, 0);
10369                 if (!ret) {
10370                         found_dbackref = 1;
10371                         goto out;
10372                 }
10373         }
10374
10375 out:
10376         if (!found_dbackref)
10377                 err |= BACKREF_MISSING;
10378         btrfs_release_path(&path);
10379         if (err & BACKREF_MISSING) {
10380                 error("data extent[%llu %llu] backref lost",
10381                       disk_bytenr, disk_num_bytes);
10382         }
10383         return err;
10384 }
10385
10386 /*
10387  * Get real tree block level for the case like shared block
10388  * Return >= 0 as tree level
10389  * Return <0 for error
10390  */
10391 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10392 {
10393         struct extent_buffer *eb;
10394         struct btrfs_path path;
10395         struct btrfs_key key;
10396         struct btrfs_extent_item *ei;
10397         u64 flags;
10398         u64 transid;
10399         u8 backref_level;
10400         u8 header_level;
10401         int ret;
10402
10403         /* Search extent tree for extent generation and level */
10404         key.objectid = bytenr;
10405         key.type = BTRFS_METADATA_ITEM_KEY;
10406         key.offset = (u64)-1;
10407
10408         btrfs_init_path(&path);
10409         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10410         if (ret < 0)
10411                 goto release_out;
10412         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10413         if (ret < 0)
10414                 goto release_out;
10415         if (ret > 0) {
10416                 ret = -ENOENT;
10417                 goto release_out;
10418         }
10419
10420         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10421         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10422                             struct btrfs_extent_item);
10423         flags = btrfs_extent_flags(path.nodes[0], ei);
10424         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10425                 ret = -ENOENT;
10426                 goto release_out;
10427         }
10428
10429         /* Get transid for later read_tree_block() check */
10430         transid = btrfs_extent_generation(path.nodes[0], ei);
10431
10432         /* Get backref level as one source */
10433         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10434                 backref_level = key.offset;
10435         } else {
10436                 struct btrfs_tree_block_info *info;
10437
10438                 info = (struct btrfs_tree_block_info *)(ei + 1);
10439                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10440         }
10441         btrfs_release_path(&path);
10442
10443         /* Get level from tree block as an alternative source */
10444         eb = read_tree_block(fs_info, bytenr, fs_info->nodesize, transid);
10445         if (!extent_buffer_uptodate(eb)) {
10446                 free_extent_buffer(eb);
10447                 return -EIO;
10448         }
10449         header_level = btrfs_header_level(eb);
10450         free_extent_buffer(eb);
10451
10452         if (header_level != backref_level)
10453                 return -EIO;
10454         return header_level;
10455
10456 release_out:
10457         btrfs_release_path(&path);
10458         return ret;
10459 }
10460
10461 /*
10462  * Check if a tree block backref is valid (points to a valid tree block)
10463  * if level == -1, level will be resolved
10464  * Return >0 for any error found and print error message
10465  */
10466 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10467                                     u64 bytenr, int level)
10468 {
10469         struct btrfs_root *root;
10470         struct btrfs_key key;
10471         struct btrfs_path path;
10472         struct extent_buffer *eb;
10473         struct extent_buffer *node;
10474         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10475         int err = 0;
10476         int ret;
10477
10478         /* Query level for level == -1 special case */
10479         if (level == -1)
10480                 level = query_tree_block_level(fs_info, bytenr);
10481         if (level < 0) {
10482                 err |= REFERENCER_MISSING;
10483                 goto out;
10484         }
10485
10486         key.objectid = root_id;
10487         key.type = BTRFS_ROOT_ITEM_KEY;
10488         key.offset = (u64)-1;
10489
10490         root = btrfs_read_fs_root(fs_info, &key);
10491         if (IS_ERR(root)) {
10492                 err |= REFERENCER_MISSING;
10493                 goto out;
10494         }
10495
10496         /* Read out the tree block to get item/node key */
10497         eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10498         if (!extent_buffer_uptodate(eb)) {
10499                 err |= REFERENCER_MISSING;
10500                 free_extent_buffer(eb);
10501                 goto out;
10502         }
10503
10504         /* Empty tree, no need to check key */
10505         if (!btrfs_header_nritems(eb) && !level) {
10506                 free_extent_buffer(eb);
10507                 goto out;
10508         }
10509
10510         if (level)
10511                 btrfs_node_key_to_cpu(eb, &key, 0);
10512         else
10513                 btrfs_item_key_to_cpu(eb, &key, 0);
10514
10515         free_extent_buffer(eb);
10516
10517         btrfs_init_path(&path);
10518         path.lowest_level = level;
10519         /* Search with the first key, to ensure we can reach it */
10520         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10521         if (ret < 0) {
10522                 err |= REFERENCER_MISSING;
10523                 goto release_out;
10524         }
10525
10526         node = path.nodes[level];
10527         if (btrfs_header_bytenr(node) != bytenr) {
10528                 error(
10529         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10530                         bytenr, nodesize, bytenr,
10531                         btrfs_header_bytenr(node));
10532                 err |= REFERENCER_MISMATCH;
10533         }
10534         if (btrfs_header_level(node) != level) {
10535                 error(
10536         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10537                         bytenr, nodesize, level,
10538                         btrfs_header_level(node));
10539                 err |= REFERENCER_MISMATCH;
10540         }
10541
10542 release_out:
10543         btrfs_release_path(&path);
10544 out:
10545         if (err & REFERENCER_MISSING) {
10546                 if (level < 0)
10547                         error("extent [%llu %d] lost referencer (owner: %llu)",
10548                                 bytenr, nodesize, root_id);
10549                 else
10550                         error(
10551                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10552                                 bytenr, nodesize, root_id, level);
10553         }
10554
10555         return err;
10556 }
10557
10558 /*
10559  * Check if tree block @eb is tree reloc root.
10560  * Return 0 if it's not or any problem happens
10561  * Return 1 if it's a tree reloc root
10562  */
10563 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10564                                  struct extent_buffer *eb)
10565 {
10566         struct btrfs_root *tree_reloc_root;
10567         struct btrfs_key key;
10568         u64 bytenr = btrfs_header_bytenr(eb);
10569         u64 owner = btrfs_header_owner(eb);
10570         int ret = 0;
10571
10572         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10573         key.offset = owner;
10574         key.type = BTRFS_ROOT_ITEM_KEY;
10575
10576         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10577         if (IS_ERR(tree_reloc_root))
10578                 return 0;
10579
10580         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10581                 ret = 1;
10582         btrfs_free_fs_root(tree_reloc_root);
10583         return ret;
10584 }
10585
10586 /*
10587  * Check referencer for shared block backref
10588  * If level == -1, this function will resolve the level.
10589  */
10590 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10591                                      u64 parent, u64 bytenr, int level)
10592 {
10593         struct extent_buffer *eb;
10594         u32 nr;
10595         int found_parent = 0;
10596         int i;
10597
10598         eb = read_tree_block(fs_info, parent, fs_info->nodesize, 0);
10599         if (!extent_buffer_uptodate(eb))
10600                 goto out;
10601
10602         if (level == -1)
10603                 level = query_tree_block_level(fs_info, bytenr);
10604         if (level < 0)
10605                 goto out;
10606
10607         /* It's possible it's a tree reloc root */
10608         if (parent == bytenr) {
10609                 if (is_tree_reloc_root(fs_info, eb))
10610                         found_parent = 1;
10611                 goto out;
10612         }
10613
10614         if (level + 1 != btrfs_header_level(eb))
10615                 goto out;
10616
10617         nr = btrfs_header_nritems(eb);
10618         for (i = 0; i < nr; i++) {
10619                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10620                         found_parent = 1;
10621                         break;
10622                 }
10623         }
10624 out:
10625         free_extent_buffer(eb);
10626         if (!found_parent) {
10627                 error(
10628         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10629                         bytenr, fs_info->nodesize, parent, level);
10630                 return REFERENCER_MISSING;
10631         }
10632         return 0;
10633 }
10634
10635 /*
10636  * Check referencer for normal (inlined) data ref
10637  * If len == 0, it will be resolved by searching in extent tree
10638  */
10639 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10640                                      u64 root_id, u64 objectid, u64 offset,
10641                                      u64 bytenr, u64 len, u32 count)
10642 {
10643         struct btrfs_root *root;
10644         struct btrfs_root *extent_root = fs_info->extent_root;
10645         struct btrfs_key key;
10646         struct btrfs_path path;
10647         struct extent_buffer *leaf;
10648         struct btrfs_file_extent_item *fi;
10649         u32 found_count = 0;
10650         int slot;
10651         int ret = 0;
10652
10653         if (!len) {
10654                 key.objectid = bytenr;
10655                 key.type = BTRFS_EXTENT_ITEM_KEY;
10656                 key.offset = (u64)-1;
10657
10658                 btrfs_init_path(&path);
10659                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10660                 if (ret < 0)
10661                         goto out;
10662                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10663                 if (ret)
10664                         goto out;
10665                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10666                 if (key.objectid != bytenr ||
10667                     key.type != BTRFS_EXTENT_ITEM_KEY)
10668                         goto out;
10669                 len = key.offset;
10670                 btrfs_release_path(&path);
10671         }
10672         key.objectid = root_id;
10673         key.type = BTRFS_ROOT_ITEM_KEY;
10674         key.offset = (u64)-1;
10675         btrfs_init_path(&path);
10676
10677         root = btrfs_read_fs_root(fs_info, &key);
10678         if (IS_ERR(root))
10679                 goto out;
10680
10681         key.objectid = objectid;
10682         key.type = BTRFS_EXTENT_DATA_KEY;
10683         /*
10684          * It can be nasty as data backref offset is
10685          * file offset - file extent offset, which is smaller or
10686          * equal to original backref offset.  The only special case is
10687          * overflow.  So we need to special check and do further search.
10688          */
10689         key.offset = offset & (1ULL << 63) ? 0 : offset;
10690
10691         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10692         if (ret < 0)
10693                 goto out;
10694
10695         /*
10696          * Search afterwards to get correct one
10697          * NOTE: As we must do a comprehensive check on the data backref to
10698          * make sure the dref count also matches, we must iterate all file
10699          * extents for that inode.
10700          */
10701         while (1) {
10702                 leaf = path.nodes[0];
10703                 slot = path.slots[0];
10704
10705                 if (slot >= btrfs_header_nritems(leaf))
10706                         goto next;
10707                 btrfs_item_key_to_cpu(leaf, &key, slot);
10708                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10709                         break;
10710                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10711                 /*
10712                  * Except normal disk bytenr and disk num bytes, we still
10713                  * need to do extra check on dbackref offset as
10714                  * dbackref offset = file_offset - file_extent_offset
10715                  */
10716                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10717                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10718                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10719                     offset)
10720                         found_count++;
10721
10722 next:
10723                 ret = btrfs_next_item(root, &path);
10724                 if (ret)
10725                         break;
10726         }
10727 out:
10728         btrfs_release_path(&path);
10729         if (found_count != count) {
10730                 error(
10731 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10732                         bytenr, len, root_id, objectid, offset, count, found_count);
10733                 return REFERENCER_MISSING;
10734         }
10735         return 0;
10736 }
10737
10738 /*
10739  * Check if the referencer of a shared data backref exists
10740  */
10741 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10742                                      u64 parent, u64 bytenr)
10743 {
10744         struct extent_buffer *eb;
10745         struct btrfs_key key;
10746         struct btrfs_file_extent_item *fi;
10747         u32 nr;
10748         int found_parent = 0;
10749         int i;
10750
10751         eb = read_tree_block(fs_info, parent, fs_info->nodesize, 0);
10752         if (!extent_buffer_uptodate(eb))
10753                 goto out;
10754
10755         nr = btrfs_header_nritems(eb);
10756         for (i = 0; i < nr; i++) {
10757                 btrfs_item_key_to_cpu(eb, &key, i);
10758                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10759                         continue;
10760
10761                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10762                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10763                         continue;
10764
10765                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10766                         found_parent = 1;
10767                         break;
10768                 }
10769         }
10770
10771 out:
10772         free_extent_buffer(eb);
10773         if (!found_parent) {
10774                 error("shared extent %llu referencer lost (parent: %llu)",
10775                         bytenr, parent);
10776                 return REFERENCER_MISSING;
10777         }
10778         return 0;
10779 }
10780
10781 /*
10782  * This function will check a given extent item, including its backref and
10783  * itself (like crossing stripe boundary and type)
10784  *
10785  * Since we don't use extent_record anymore, introduce new error bit
10786  */
10787 static int check_extent_item(struct btrfs_fs_info *fs_info,
10788                              struct extent_buffer *eb, int slot)
10789 {
10790         struct btrfs_extent_item *ei;
10791         struct btrfs_extent_inline_ref *iref;
10792         struct btrfs_extent_data_ref *dref;
10793         unsigned long end;
10794         unsigned long ptr;
10795         int type;
10796         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10797         u32 item_size = btrfs_item_size_nr(eb, slot);
10798         u64 flags;
10799         u64 offset;
10800         int metadata = 0;
10801         int level;
10802         struct btrfs_key key;
10803         int ret;
10804         int err = 0;
10805
10806         btrfs_item_key_to_cpu(eb, &key, slot);
10807         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10808                 bytes_used += key.offset;
10809         else
10810                 bytes_used += nodesize;
10811
10812         if (item_size < sizeof(*ei)) {
10813                 /*
10814                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10815                  * old thing when on disk format is still un-determined.
10816                  * No need to care about it anymore
10817                  */
10818                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10819                 return -ENOTTY;
10820         }
10821
10822         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10823         flags = btrfs_extent_flags(eb, ei);
10824
10825         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10826                 metadata = 1;
10827         if (metadata && check_crossing_stripes(global_info, key.objectid,
10828                                                eb->len)) {
10829                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10830                       key.objectid, key.objectid + nodesize);
10831                 err |= CROSSING_STRIPE_BOUNDARY;
10832         }
10833
10834         ptr = (unsigned long)(ei + 1);
10835
10836         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10837                 /* Old EXTENT_ITEM metadata */
10838                 struct btrfs_tree_block_info *info;
10839
10840                 info = (struct btrfs_tree_block_info *)ptr;
10841                 level = btrfs_tree_block_level(eb, info);
10842                 ptr += sizeof(struct btrfs_tree_block_info);
10843         } else {
10844                 /* New METADATA_ITEM */
10845                 level = key.offset;
10846         }
10847         end = (unsigned long)ei + item_size;
10848
10849 next:
10850         /* Reached extent item end normally */
10851         if (ptr == end)
10852                 goto out;
10853
10854         /* Beyond extent item end, wrong item size */
10855         if (ptr > end) {
10856                 err |= ITEM_SIZE_MISMATCH;
10857                 error("extent item at bytenr %llu slot %d has wrong size",
10858                         eb->start, slot);
10859                 goto out;
10860         }
10861
10862         /* Now check every backref in this extent item */
10863         iref = (struct btrfs_extent_inline_ref *)ptr;
10864         type = btrfs_extent_inline_ref_type(eb, iref);
10865         offset = btrfs_extent_inline_ref_offset(eb, iref);
10866         switch (type) {
10867         case BTRFS_TREE_BLOCK_REF_KEY:
10868                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10869                                                level);
10870                 err |= ret;
10871                 break;
10872         case BTRFS_SHARED_BLOCK_REF_KEY:
10873                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10874                                                  level);
10875                 err |= ret;
10876                 break;
10877         case BTRFS_EXTENT_DATA_REF_KEY:
10878                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10879                 ret = check_extent_data_backref(fs_info,
10880                                 btrfs_extent_data_ref_root(eb, dref),
10881                                 btrfs_extent_data_ref_objectid(eb, dref),
10882                                 btrfs_extent_data_ref_offset(eb, dref),
10883                                 key.objectid, key.offset,
10884                                 btrfs_extent_data_ref_count(eb, dref));
10885                 err |= ret;
10886                 break;
10887         case BTRFS_SHARED_DATA_REF_KEY:
10888                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10889                 err |= ret;
10890                 break;
10891         default:
10892                 error("extent[%llu %d %llu] has unknown ref type: %d",
10893                         key.objectid, key.type, key.offset, type);
10894                 err |= UNKNOWN_TYPE;
10895                 goto out;
10896         }
10897
10898         ptr += btrfs_extent_inline_ref_size(type);
10899         goto next;
10900
10901 out:
10902         return err;
10903 }
10904
10905 /*
10906  * Check if a dev extent item is referred correctly by its chunk
10907  */
10908 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10909                                  struct extent_buffer *eb, int slot)
10910 {
10911         struct btrfs_root *chunk_root = fs_info->chunk_root;
10912         struct btrfs_dev_extent *ptr;
10913         struct btrfs_path path;
10914         struct btrfs_key chunk_key;
10915         struct btrfs_key devext_key;
10916         struct btrfs_chunk *chunk;
10917         struct extent_buffer *l;
10918         int num_stripes;
10919         u64 length;
10920         int i;
10921         int found_chunk = 0;
10922         int ret;
10923
10924         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10925         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10926         length = btrfs_dev_extent_length(eb, ptr);
10927
10928         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10929         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10930         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10931
10932         btrfs_init_path(&path);
10933         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10934         if (ret)
10935                 goto out;
10936
10937         l = path.nodes[0];
10938         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10939         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10940                                       chunk_key.offset);
10941         if (ret < 0)
10942                 goto out;
10943
10944         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10945                 goto out;
10946
10947         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10948         for (i = 0; i < num_stripes; i++) {
10949                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10950                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10951
10952                 if (devid == devext_key.objectid &&
10953                     offset == devext_key.offset) {
10954                         found_chunk = 1;
10955                         break;
10956                 }
10957         }
10958 out:
10959         btrfs_release_path(&path);
10960         if (!found_chunk) {
10961                 error(
10962                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10963                         devext_key.objectid, devext_key.offset, length);
10964                 return REFERENCER_MISSING;
10965         }
10966         return 0;
10967 }
10968
10969 /*
10970  * Check if the used space is correct with the dev item
10971  */
10972 static int check_dev_item(struct btrfs_fs_info *fs_info,
10973                           struct extent_buffer *eb, int slot)
10974 {
10975         struct btrfs_root *dev_root = fs_info->dev_root;
10976         struct btrfs_dev_item *dev_item;
10977         struct btrfs_path path;
10978         struct btrfs_key key;
10979         struct btrfs_dev_extent *ptr;
10980         u64 dev_id;
10981         u64 used;
10982         u64 total = 0;
10983         int ret;
10984
10985         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10986         dev_id = btrfs_device_id(eb, dev_item);
10987         used = btrfs_device_bytes_used(eb, dev_item);
10988
10989         key.objectid = dev_id;
10990         key.type = BTRFS_DEV_EXTENT_KEY;
10991         key.offset = 0;
10992
10993         btrfs_init_path(&path);
10994         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10995         if (ret < 0) {
10996                 btrfs_item_key_to_cpu(eb, &key, slot);
10997                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10998                         key.objectid, key.type, key.offset);
10999                 btrfs_release_path(&path);
11000                 return REFERENCER_MISSING;
11001         }
11002
11003         /* Iterate dev_extents to calculate the used space of a device */
11004         while (1) {
11005                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11006                         goto next;
11007
11008                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11009                 if (key.objectid > dev_id)
11010                         break;
11011                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11012                         goto next;
11013
11014                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11015                                      struct btrfs_dev_extent);
11016                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11017 next:
11018                 ret = btrfs_next_item(dev_root, &path);
11019                 if (ret)
11020                         break;
11021         }
11022         btrfs_release_path(&path);
11023
11024         if (used != total) {
11025                 btrfs_item_key_to_cpu(eb, &key, slot);
11026                 error(
11027 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11028                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11029                         BTRFS_DEV_EXTENT_KEY, dev_id);
11030                 return ACCOUNTING_MISMATCH;
11031         }
11032         return 0;
11033 }
11034
11035 /*
11036  * Check a block group item with its referener (chunk) and its used space
11037  * with extent/metadata item
11038  */
11039 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11040                                   struct extent_buffer *eb, int slot)
11041 {
11042         struct btrfs_root *extent_root = fs_info->extent_root;
11043         struct btrfs_root *chunk_root = fs_info->chunk_root;
11044         struct btrfs_block_group_item *bi;
11045         struct btrfs_block_group_item bg_item;
11046         struct btrfs_path path;
11047         struct btrfs_key bg_key;
11048         struct btrfs_key chunk_key;
11049         struct btrfs_key extent_key;
11050         struct btrfs_chunk *chunk;
11051         struct extent_buffer *leaf;
11052         struct btrfs_extent_item *ei;
11053         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11054         u64 flags;
11055         u64 bg_flags;
11056         u64 used;
11057         u64 total = 0;
11058         int ret;
11059         int err = 0;
11060
11061         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11062         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11063         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11064         used = btrfs_block_group_used(&bg_item);
11065         bg_flags = btrfs_block_group_flags(&bg_item);
11066
11067         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11068         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11069         chunk_key.offset = bg_key.objectid;
11070
11071         btrfs_init_path(&path);
11072         /* Search for the referencer chunk */
11073         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11074         if (ret) {
11075                 error(
11076                 "block group[%llu %llu] did not find the related chunk item",
11077                         bg_key.objectid, bg_key.offset);
11078                 err |= REFERENCER_MISSING;
11079         } else {
11080                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11081                                         struct btrfs_chunk);
11082                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11083                                                 bg_key.offset) {
11084                         error(
11085         "block group[%llu %llu] related chunk item length does not match",
11086                                 bg_key.objectid, bg_key.offset);
11087                         err |= REFERENCER_MISMATCH;
11088                 }
11089         }
11090         btrfs_release_path(&path);
11091
11092         /* Search from the block group bytenr */
11093         extent_key.objectid = bg_key.objectid;
11094         extent_key.type = 0;
11095         extent_key.offset = 0;
11096
11097         btrfs_init_path(&path);
11098         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11099         if (ret < 0)
11100                 goto out;
11101
11102         /* Iterate extent tree to account used space */
11103         while (1) {
11104                 leaf = path.nodes[0];
11105
11106                 /* Search slot can point to the last item beyond leaf nritems */
11107                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11108                         goto next;
11109
11110                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11111                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11112                         break;
11113
11114                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11115                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11116                         goto next;
11117                 if (extent_key.objectid < bg_key.objectid)
11118                         goto next;
11119
11120                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11121                         total += nodesize;
11122                 else
11123                         total += extent_key.offset;
11124
11125                 ei = btrfs_item_ptr(leaf, path.slots[0],
11126                                     struct btrfs_extent_item);
11127                 flags = btrfs_extent_flags(leaf, ei);
11128                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11129                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11130                                 error(
11131                         "bad extent[%llu, %llu) type mismatch with chunk",
11132                                         extent_key.objectid,
11133                                         extent_key.objectid + extent_key.offset);
11134                                 err |= CHUNK_TYPE_MISMATCH;
11135                         }
11136                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11137                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11138                                     BTRFS_BLOCK_GROUP_METADATA))) {
11139                                 error(
11140                         "bad extent[%llu, %llu) type mismatch with chunk",
11141                                         extent_key.objectid,
11142                                         extent_key.objectid + nodesize);
11143                                 err |= CHUNK_TYPE_MISMATCH;
11144                         }
11145                 }
11146 next:
11147                 ret = btrfs_next_item(extent_root, &path);
11148                 if (ret)
11149                         break;
11150         }
11151
11152 out:
11153         btrfs_release_path(&path);
11154
11155         if (total != used) {
11156                 error(
11157                 "block group[%llu %llu] used %llu but extent items used %llu",
11158                         bg_key.objectid, bg_key.offset, used, total);
11159                 err |= ACCOUNTING_MISMATCH;
11160         }
11161         return err;
11162 }
11163
11164 /*
11165  * Check a chunk item.
11166  * Including checking all referred dev_extents and block group
11167  */
11168 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11169                             struct extent_buffer *eb, int slot)
11170 {
11171         struct btrfs_root *extent_root = fs_info->extent_root;
11172         struct btrfs_root *dev_root = fs_info->dev_root;
11173         struct btrfs_path path;
11174         struct btrfs_key chunk_key;
11175         struct btrfs_key bg_key;
11176         struct btrfs_key devext_key;
11177         struct btrfs_chunk *chunk;
11178         struct extent_buffer *leaf;
11179         struct btrfs_block_group_item *bi;
11180         struct btrfs_block_group_item bg_item;
11181         struct btrfs_dev_extent *ptr;
11182         u64 length;
11183         u64 chunk_end;
11184         u64 stripe_len;
11185         u64 type;
11186         int num_stripes;
11187         u64 offset;
11188         u64 objectid;
11189         int i;
11190         int ret;
11191         int err = 0;
11192
11193         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11194         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11195         length = btrfs_chunk_length(eb, chunk);
11196         chunk_end = chunk_key.offset + length;
11197         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11198                                       chunk_key.offset);
11199         if (ret < 0) {
11200                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11201                         chunk_end);
11202                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11203                 goto out;
11204         }
11205         type = btrfs_chunk_type(eb, chunk);
11206
11207         bg_key.objectid = chunk_key.offset;
11208         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11209         bg_key.offset = length;
11210
11211         btrfs_init_path(&path);
11212         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11213         if (ret) {
11214                 error(
11215                 "chunk[%llu %llu) did not find the related block group item",
11216                         chunk_key.offset, chunk_end);
11217                 err |= REFERENCER_MISSING;
11218         } else{
11219                 leaf = path.nodes[0];
11220                 bi = btrfs_item_ptr(leaf, path.slots[0],
11221                                     struct btrfs_block_group_item);
11222                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11223                                    sizeof(bg_item));
11224                 if (btrfs_block_group_flags(&bg_item) != type) {
11225                         error(
11226 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11227                                 chunk_key.offset, chunk_end, type,
11228                                 btrfs_block_group_flags(&bg_item));
11229                         err |= REFERENCER_MISSING;
11230                 }
11231         }
11232
11233         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11234         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11235         for (i = 0; i < num_stripes; i++) {
11236                 btrfs_release_path(&path);
11237                 btrfs_init_path(&path);
11238                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11239                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11240                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11241
11242                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11243                                         0, 0);
11244                 if (ret)
11245                         goto not_match_dev;
11246
11247                 leaf = path.nodes[0];
11248                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11249                                      struct btrfs_dev_extent);
11250                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11251                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11252                 if (objectid != chunk_key.objectid ||
11253                     offset != chunk_key.offset ||
11254                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11255                         goto not_match_dev;
11256                 continue;
11257 not_match_dev:
11258                 err |= BACKREF_MISSING;
11259                 error(
11260                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11261                         chunk_key.objectid, chunk_end, i);
11262                 continue;
11263         }
11264         btrfs_release_path(&path);
11265 out:
11266         return err;
11267 }
11268
11269 /*
11270  * Main entry function to check known items and update related accounting info
11271  */
11272 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11273 {
11274         struct btrfs_fs_info *fs_info = root->fs_info;
11275         struct btrfs_key key;
11276         int slot = 0;
11277         int type;
11278         struct btrfs_extent_data_ref *dref;
11279         int ret;
11280         int err = 0;
11281
11282 next:
11283         btrfs_item_key_to_cpu(eb, &key, slot);
11284         type = key.type;
11285
11286         switch (type) {
11287         case BTRFS_EXTENT_DATA_KEY:
11288                 ret = check_extent_data_item(root, eb, slot);
11289                 err |= ret;
11290                 break;
11291         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11292                 ret = check_block_group_item(fs_info, eb, slot);
11293                 err |= ret;
11294                 break;
11295         case BTRFS_DEV_ITEM_KEY:
11296                 ret = check_dev_item(fs_info, eb, slot);
11297                 err |= ret;
11298                 break;
11299         case BTRFS_CHUNK_ITEM_KEY:
11300                 ret = check_chunk_item(fs_info, eb, slot);
11301                 err |= ret;
11302                 break;
11303         case BTRFS_DEV_EXTENT_KEY:
11304                 ret = check_dev_extent_item(fs_info, eb, slot);
11305                 err |= ret;
11306                 break;
11307         case BTRFS_EXTENT_ITEM_KEY:
11308         case BTRFS_METADATA_ITEM_KEY:
11309                 ret = check_extent_item(fs_info, eb, slot);
11310                 err |= ret;
11311                 break;
11312         case BTRFS_EXTENT_CSUM_KEY:
11313                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11314                 break;
11315         case BTRFS_TREE_BLOCK_REF_KEY:
11316                 ret = check_tree_block_backref(fs_info, key.offset,
11317                                                key.objectid, -1);
11318                 err |= ret;
11319                 break;
11320         case BTRFS_EXTENT_DATA_REF_KEY:
11321                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11322                 ret = check_extent_data_backref(fs_info,
11323                                 btrfs_extent_data_ref_root(eb, dref),
11324                                 btrfs_extent_data_ref_objectid(eb, dref),
11325                                 btrfs_extent_data_ref_offset(eb, dref),
11326                                 key.objectid, 0,
11327                                 btrfs_extent_data_ref_count(eb, dref));
11328                 err |= ret;
11329                 break;
11330         case BTRFS_SHARED_BLOCK_REF_KEY:
11331                 ret = check_shared_block_backref(fs_info, key.offset,
11332                                                  key.objectid, -1);
11333                 err |= ret;
11334                 break;
11335         case BTRFS_SHARED_DATA_REF_KEY:
11336                 ret = check_shared_data_backref(fs_info, key.offset,
11337                                                 key.objectid);
11338                 err |= ret;
11339                 break;
11340         default:
11341                 break;
11342         }
11343
11344         if (++slot < btrfs_header_nritems(eb))
11345                 goto next;
11346
11347         return err;
11348 }
11349
11350 /*
11351  * Helper function for later fs/subvol tree check.  To determine if a tree
11352  * block should be checked.
11353  * This function will ensure only the direct referencer with lowest rootid to
11354  * check a fs/subvolume tree block.
11355  *
11356  * Backref check at extent tree would detect errors like missing subvolume
11357  * tree, so we can do aggressive check to reduce duplicated checks.
11358  */
11359 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11360 {
11361         struct btrfs_root *extent_root = root->fs_info->extent_root;
11362         struct btrfs_key key;
11363         struct btrfs_path path;
11364         struct extent_buffer *leaf;
11365         int slot;
11366         struct btrfs_extent_item *ei;
11367         unsigned long ptr;
11368         unsigned long end;
11369         int type;
11370         u32 item_size;
11371         u64 offset;
11372         struct btrfs_extent_inline_ref *iref;
11373         int ret;
11374
11375         btrfs_init_path(&path);
11376         key.objectid = btrfs_header_bytenr(eb);
11377         key.type = BTRFS_METADATA_ITEM_KEY;
11378         key.offset = (u64)-1;
11379
11380         /*
11381          * Any failure in backref resolving means we can't determine
11382          * whom the tree block belongs to.
11383          * So in that case, we need to check that tree block
11384          */
11385         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11386         if (ret < 0)
11387                 goto need_check;
11388
11389         ret = btrfs_previous_extent_item(extent_root, &path,
11390                                          btrfs_header_bytenr(eb));
11391         if (ret)
11392                 goto need_check;
11393
11394         leaf = path.nodes[0];
11395         slot = path.slots[0];
11396         btrfs_item_key_to_cpu(leaf, &key, slot);
11397         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11398
11399         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11400                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11401         } else {
11402                 struct btrfs_tree_block_info *info;
11403
11404                 info = (struct btrfs_tree_block_info *)(ei + 1);
11405                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11406         }
11407
11408         item_size = btrfs_item_size_nr(leaf, slot);
11409         ptr = (unsigned long)iref;
11410         end = (unsigned long)ei + item_size;
11411         while (ptr < end) {
11412                 iref = (struct btrfs_extent_inline_ref *)ptr;
11413                 type = btrfs_extent_inline_ref_type(leaf, iref);
11414                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11415
11416                 /*
11417                  * We only check the tree block if current root is
11418                  * the lowest referencer of it.
11419                  */
11420                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11421                     offset < root->objectid) {
11422                         btrfs_release_path(&path);
11423                         return 0;
11424                 }
11425
11426                 ptr += btrfs_extent_inline_ref_size(type);
11427         }
11428         /*
11429          * Normally we should also check keyed tree block ref, but that may be
11430          * very time consuming.  Inlined ref should already make us skip a lot
11431          * of refs now.  So skip search keyed tree block ref.
11432          */
11433
11434 need_check:
11435         btrfs_release_path(&path);
11436         return 1;
11437 }
11438
11439 /*
11440  * Traversal function for tree block. We will do:
11441  * 1) Skip shared fs/subvolume tree blocks
11442  * 2) Update related bytes accounting
11443  * 3) Pre-order traversal
11444  */
11445 static int traverse_tree_block(struct btrfs_root *root,
11446                                 struct extent_buffer *node)
11447 {
11448         struct extent_buffer *eb;
11449         struct btrfs_key key;
11450         struct btrfs_key drop_key;
11451         int level;
11452         u64 nr;
11453         int i;
11454         int err = 0;
11455         int ret;
11456
11457         /*
11458          * Skip shared fs/subvolume tree block, in that case they will
11459          * be checked by referencer with lowest rootid
11460          */
11461         if (is_fstree(root->objectid) && !should_check(root, node))
11462                 return 0;
11463
11464         /* Update bytes accounting */
11465         total_btree_bytes += node->len;
11466         if (fs_root_objectid(btrfs_header_owner(node)))
11467                 total_fs_tree_bytes += node->len;
11468         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11469                 total_extent_tree_bytes += node->len;
11470         if (!found_old_backref &&
11471             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11472             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11473             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11474                 found_old_backref = 1;
11475
11476         /* pre-order tranversal, check itself first */
11477         level = btrfs_header_level(node);
11478         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11479                                    btrfs_header_level(node),
11480                                    btrfs_header_owner(node));
11481         err |= ret;
11482         if (err)
11483                 error(
11484         "check %s failed root %llu bytenr %llu level %d, force continue check",
11485                         level ? "node":"leaf", root->objectid,
11486                         btrfs_header_bytenr(node), btrfs_header_level(node));
11487
11488         if (!level) {
11489                 btree_space_waste += btrfs_leaf_free_space(root, node);
11490                 ret = check_leaf_items(root, node);
11491                 err |= ret;
11492                 return err;
11493         }
11494
11495         nr = btrfs_header_nritems(node);
11496         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11497         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11498                 sizeof(struct btrfs_key_ptr);
11499
11500         /* Then check all its children */
11501         for (i = 0; i < nr; i++) {
11502                 u64 blocknr = btrfs_node_blockptr(node, i);
11503
11504                 btrfs_node_key_to_cpu(node, &key, i);
11505                 if (level == root->root_item.drop_level &&
11506                     is_dropped_key(&key, &drop_key))
11507                         continue;
11508
11509                 /*
11510                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11511                  * to call the function itself.
11512                  */
11513                 eb = read_tree_block(root->fs_info, blocknr,
11514                                 root->fs_info->nodesize, 0);
11515                 if (extent_buffer_uptodate(eb)) {
11516                         ret = traverse_tree_block(root, eb);
11517                         err |= ret;
11518                 }
11519                 free_extent_buffer(eb);
11520         }
11521
11522         return err;
11523 }
11524
11525 /*
11526  * Low memory usage version check_chunks_and_extents.
11527  */
11528 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11529 {
11530         struct btrfs_path path;
11531         struct btrfs_key key;
11532         struct btrfs_root *root1;
11533         struct btrfs_root *cur_root;
11534         int err = 0;
11535         int ret;
11536
11537         root1 = root->fs_info->chunk_root;
11538         ret = traverse_tree_block(root1, root1->node);
11539         err |= ret;
11540
11541         root1 = root->fs_info->tree_root;
11542         ret = traverse_tree_block(root1, root1->node);
11543         err |= ret;
11544
11545         btrfs_init_path(&path);
11546         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11547         key.offset = 0;
11548         key.type = BTRFS_ROOT_ITEM_KEY;
11549
11550         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11551         if (ret) {
11552                 error("cannot find extent treet in tree_root");
11553                 goto out;
11554         }
11555
11556         while (1) {
11557                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11558                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11559                         goto next;
11560                 key.offset = (u64)-1;
11561
11562                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11563                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11564                                         &key);
11565                 else
11566                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11567                 if (IS_ERR(cur_root) || !cur_root) {
11568                         error("failed to read tree: %lld", key.objectid);
11569                         goto next;
11570                 }
11571
11572                 ret = traverse_tree_block(cur_root, cur_root->node);
11573                 err |= ret;
11574
11575                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11576                         btrfs_free_fs_root(cur_root);
11577 next:
11578                 ret = btrfs_next_item(root1, &path);
11579                 if (ret)
11580                         goto out;
11581         }
11582
11583 out:
11584         btrfs_release_path(&path);
11585         return err;
11586 }
11587
11588 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11589                            struct btrfs_root *root, int overwrite)
11590 {
11591         struct extent_buffer *c;
11592         struct extent_buffer *old = root->node;
11593         int level;
11594         int ret;
11595         struct btrfs_disk_key disk_key = {0,0,0};
11596
11597         level = 0;
11598
11599         if (overwrite) {
11600                 c = old;
11601                 extent_buffer_get(c);
11602                 goto init;
11603         }
11604         c = btrfs_alloc_free_block(trans, root,
11605                                    root->fs_info->nodesize,
11606                                    root->root_key.objectid,
11607                                    &disk_key, level, 0, 0);
11608         if (IS_ERR(c)) {
11609                 c = old;
11610                 extent_buffer_get(c);
11611                 overwrite = 1;
11612         }
11613 init:
11614         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11615         btrfs_set_header_level(c, level);
11616         btrfs_set_header_bytenr(c, c->start);
11617         btrfs_set_header_generation(c, trans->transid);
11618         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11619         btrfs_set_header_owner(c, root->root_key.objectid);
11620
11621         write_extent_buffer(c, root->fs_info->fsid,
11622                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11623
11624         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11625                             btrfs_header_chunk_tree_uuid(c),
11626                             BTRFS_UUID_SIZE);
11627
11628         btrfs_mark_buffer_dirty(c);
11629         /*
11630          * this case can happen in the following case:
11631          *
11632          * 1.overwrite previous root.
11633          *
11634          * 2.reinit reloc data root, this is because we skip pin
11635          * down reloc data tree before which means we can allocate
11636          * same block bytenr here.
11637          */
11638         if (old->start == c->start) {
11639                 btrfs_set_root_generation(&root->root_item,
11640                                           trans->transid);
11641                 root->root_item.level = btrfs_header_level(root->node);
11642                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11643                                         &root->root_key, &root->root_item);
11644                 if (ret) {
11645                         free_extent_buffer(c);
11646                         return ret;
11647                 }
11648         }
11649         free_extent_buffer(old);
11650         root->node = c;
11651         add_root_to_dirty_list(root);
11652         return 0;
11653 }
11654
11655 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11656                                 struct extent_buffer *eb, int tree_root)
11657 {
11658         struct extent_buffer *tmp;
11659         struct btrfs_root_item *ri;
11660         struct btrfs_key key;
11661         u64 bytenr;
11662         int level = btrfs_header_level(eb);
11663         int nritems;
11664         int ret;
11665         int i;
11666
11667         /*
11668          * If we have pinned this block before, don't pin it again.
11669          * This can not only avoid forever loop with broken filesystem
11670          * but also give us some speedups.
11671          */
11672         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11673                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11674                 return 0;
11675
11676         btrfs_pin_extent(fs_info, eb->start, eb->len);
11677
11678         nritems = btrfs_header_nritems(eb);
11679         for (i = 0; i < nritems; i++) {
11680                 if (level == 0) {
11681                         btrfs_item_key_to_cpu(eb, &key, i);
11682                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11683                                 continue;
11684                         /* Skip the extent root and reloc roots */
11685                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11686                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11687                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11688                                 continue;
11689                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11690                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11691
11692                         /*
11693                          * If at any point we start needing the real root we
11694                          * will have to build a stump root for the root we are
11695                          * in, but for now this doesn't actually use the root so
11696                          * just pass in extent_root.
11697                          */
11698                         tmp = read_tree_block(fs_info, bytenr, fs_info->nodesize, 0);
11699                         if (!extent_buffer_uptodate(tmp)) {
11700                                 fprintf(stderr, "Error reading root block\n");
11701                                 return -EIO;
11702                         }
11703                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11704                         free_extent_buffer(tmp);
11705                         if (ret)
11706                                 return ret;
11707                 } else {
11708                         bytenr = btrfs_node_blockptr(eb, i);
11709
11710                         /* If we aren't the tree root don't read the block */
11711                         if (level == 1 && !tree_root) {
11712                                 btrfs_pin_extent(fs_info, bytenr,
11713                                                 fs_info->nodesize);
11714                                 continue;
11715                         }
11716
11717                         tmp = read_tree_block(fs_info, bytenr,
11718                                               fs_info->nodesize, 0);
11719                         if (!extent_buffer_uptodate(tmp)) {
11720                                 fprintf(stderr, "Error reading tree block\n");
11721                                 return -EIO;
11722                         }
11723                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11724                         free_extent_buffer(tmp);
11725                         if (ret)
11726                                 return ret;
11727                 }
11728         }
11729
11730         return 0;
11731 }
11732
11733 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11734 {
11735         int ret;
11736
11737         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11738         if (ret)
11739                 return ret;
11740
11741         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11742 }
11743
11744 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11745 {
11746         struct btrfs_block_group_cache *cache;
11747         struct btrfs_path path;
11748         struct extent_buffer *leaf;
11749         struct btrfs_chunk *chunk;
11750         struct btrfs_key key;
11751         int ret;
11752         u64 start;
11753
11754         btrfs_init_path(&path);
11755         key.objectid = 0;
11756         key.type = BTRFS_CHUNK_ITEM_KEY;
11757         key.offset = 0;
11758         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11759         if (ret < 0) {
11760                 btrfs_release_path(&path);
11761                 return ret;
11762         }
11763
11764         /*
11765          * We do this in case the block groups were screwed up and had alloc
11766          * bits that aren't actually set on the chunks.  This happens with
11767          * restored images every time and could happen in real life I guess.
11768          */
11769         fs_info->avail_data_alloc_bits = 0;
11770         fs_info->avail_metadata_alloc_bits = 0;
11771         fs_info->avail_system_alloc_bits = 0;
11772
11773         /* First we need to create the in-memory block groups */
11774         while (1) {
11775                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11776                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11777                         if (ret < 0) {
11778                                 btrfs_release_path(&path);
11779                                 return ret;
11780                         }
11781                         if (ret) {
11782                                 ret = 0;
11783                                 break;
11784                         }
11785                 }
11786                 leaf = path.nodes[0];
11787                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11788                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11789                         path.slots[0]++;
11790                         continue;
11791                 }
11792
11793                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11794                 btrfs_add_block_group(fs_info, 0,
11795                                       btrfs_chunk_type(leaf, chunk),
11796                                       key.objectid, key.offset,
11797                                       btrfs_chunk_length(leaf, chunk));
11798                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11799                                  key.offset + btrfs_chunk_length(leaf, chunk));
11800                 path.slots[0]++;
11801         }
11802         start = 0;
11803         while (1) {
11804                 cache = btrfs_lookup_first_block_group(fs_info, start);
11805                 if (!cache)
11806                         break;
11807                 cache->cached = 1;
11808                 start = cache->key.objectid + cache->key.offset;
11809         }
11810
11811         btrfs_release_path(&path);
11812         return 0;
11813 }
11814
11815 static int reset_balance(struct btrfs_trans_handle *trans,
11816                          struct btrfs_fs_info *fs_info)
11817 {
11818         struct btrfs_root *root = fs_info->tree_root;
11819         struct btrfs_path path;
11820         struct extent_buffer *leaf;
11821         struct btrfs_key key;
11822         int del_slot, del_nr = 0;
11823         int ret;
11824         int found = 0;
11825
11826         btrfs_init_path(&path);
11827         key.objectid = BTRFS_BALANCE_OBJECTID;
11828         key.type = BTRFS_BALANCE_ITEM_KEY;
11829         key.offset = 0;
11830         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11831         if (ret) {
11832                 if (ret > 0)
11833                         ret = 0;
11834                 if (!ret)
11835                         goto reinit_data_reloc;
11836                 else
11837                         goto out;
11838         }
11839
11840         ret = btrfs_del_item(trans, root, &path);
11841         if (ret)
11842                 goto out;
11843         btrfs_release_path(&path);
11844
11845         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11846         key.type = BTRFS_ROOT_ITEM_KEY;
11847         key.offset = 0;
11848         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11849         if (ret < 0)
11850                 goto out;
11851         while (1) {
11852                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11853                         if (!found)
11854                                 break;
11855
11856                         if (del_nr) {
11857                                 ret = btrfs_del_items(trans, root, &path,
11858                                                       del_slot, del_nr);
11859                                 del_nr = 0;
11860                                 if (ret)
11861                                         goto out;
11862                         }
11863                         key.offset++;
11864                         btrfs_release_path(&path);
11865
11866                         found = 0;
11867                         ret = btrfs_search_slot(trans, root, &key, &path,
11868                                                 -1, 1);
11869                         if (ret < 0)
11870                                 goto out;
11871                         continue;
11872                 }
11873                 found = 1;
11874                 leaf = path.nodes[0];
11875                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11876                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11877                         break;
11878                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11879                         path.slots[0]++;
11880                         continue;
11881                 }
11882                 if (!del_nr) {
11883                         del_slot = path.slots[0];
11884                         del_nr = 1;
11885                 } else {
11886                         del_nr++;
11887                 }
11888                 path.slots[0]++;
11889         }
11890
11891         if (del_nr) {
11892                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11893                 if (ret)
11894                         goto out;
11895         }
11896         btrfs_release_path(&path);
11897
11898 reinit_data_reloc:
11899         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11900         key.type = BTRFS_ROOT_ITEM_KEY;
11901         key.offset = (u64)-1;
11902         root = btrfs_read_fs_root(fs_info, &key);
11903         if (IS_ERR(root)) {
11904                 fprintf(stderr, "Error reading data reloc tree\n");
11905                 ret = PTR_ERR(root);
11906                 goto out;
11907         }
11908         record_root_in_trans(trans, root);
11909         ret = btrfs_fsck_reinit_root(trans, root, 0);
11910         if (ret)
11911                 goto out;
11912         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11913 out:
11914         btrfs_release_path(&path);
11915         return ret;
11916 }
11917
11918 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11919                               struct btrfs_fs_info *fs_info)
11920 {
11921         u64 start = 0;
11922         int ret;
11923
11924         /*
11925          * The only reason we don't do this is because right now we're just
11926          * walking the trees we find and pinning down their bytes, we don't look
11927          * at any of the leaves.  In order to do mixed groups we'd have to check
11928          * the leaves of any fs roots and pin down the bytes for any file
11929          * extents we find.  Not hard but why do it if we don't have to?
11930          */
11931         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11932                 fprintf(stderr, "We don't support re-initing the extent tree "
11933                         "for mixed block groups yet, please notify a btrfs "
11934                         "developer you want to do this so they can add this "
11935                         "functionality.\n");
11936                 return -EINVAL;
11937         }
11938
11939         /*
11940          * first we need to walk all of the trees except the extent tree and pin
11941          * down the bytes that are in use so we don't overwrite any existing
11942          * metadata.
11943          */
11944         ret = pin_metadata_blocks(fs_info);
11945         if (ret) {
11946                 fprintf(stderr, "error pinning down used bytes\n");
11947                 return ret;
11948         }
11949
11950         /*
11951          * Need to drop all the block groups since we're going to recreate all
11952          * of them again.
11953          */
11954         btrfs_free_block_groups(fs_info);
11955         ret = reset_block_groups(fs_info);
11956         if (ret) {
11957                 fprintf(stderr, "error resetting the block groups\n");
11958                 return ret;
11959         }
11960
11961         /* Ok we can allocate now, reinit the extent root */
11962         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11963         if (ret) {
11964                 fprintf(stderr, "extent root initialization failed\n");
11965                 /*
11966                  * When the transaction code is updated we should end the
11967                  * transaction, but for now progs only knows about commit so
11968                  * just return an error.
11969                  */
11970                 return ret;
11971         }
11972
11973         /*
11974          * Now we have all the in-memory block groups setup so we can make
11975          * allocations properly, and the metadata we care about is safe since we
11976          * pinned all of it above.
11977          */
11978         while (1) {
11979                 struct btrfs_block_group_cache *cache;
11980
11981                 cache = btrfs_lookup_first_block_group(fs_info, start);
11982                 if (!cache)
11983                         break;
11984                 start = cache->key.objectid + cache->key.offset;
11985                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11986                                         &cache->key, &cache->item,
11987                                         sizeof(cache->item));
11988                 if (ret) {
11989                         fprintf(stderr, "Error adding block group\n");
11990                         return ret;
11991                 }
11992                 btrfs_extent_post_op(trans, fs_info->extent_root);
11993         }
11994
11995         ret = reset_balance(trans, fs_info);
11996         if (ret)
11997                 fprintf(stderr, "error resetting the pending balance\n");
11998
11999         return ret;
12000 }
12001
12002 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12003 {
12004         struct btrfs_path path;
12005         struct btrfs_trans_handle *trans;
12006         struct btrfs_key key;
12007         int ret;
12008
12009         printf("Recowing metadata block %llu\n", eb->start);
12010         key.objectid = btrfs_header_owner(eb);
12011         key.type = BTRFS_ROOT_ITEM_KEY;
12012         key.offset = (u64)-1;
12013
12014         root = btrfs_read_fs_root(root->fs_info, &key);
12015         if (IS_ERR(root)) {
12016                 fprintf(stderr, "Couldn't find owner root %llu\n",
12017                         key.objectid);
12018                 return PTR_ERR(root);
12019         }
12020
12021         trans = btrfs_start_transaction(root, 1);
12022         if (IS_ERR(trans))
12023                 return PTR_ERR(trans);
12024
12025         btrfs_init_path(&path);
12026         path.lowest_level = btrfs_header_level(eb);
12027         if (path.lowest_level)
12028                 btrfs_node_key_to_cpu(eb, &key, 0);
12029         else
12030                 btrfs_item_key_to_cpu(eb, &key, 0);
12031
12032         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12033         btrfs_commit_transaction(trans, root);
12034         btrfs_release_path(&path);
12035         return ret;
12036 }
12037
12038 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12039 {
12040         struct btrfs_path path;
12041         struct btrfs_trans_handle *trans;
12042         struct btrfs_key key;
12043         int ret;
12044
12045         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12046                bad->key.type, bad->key.offset);
12047         key.objectid = bad->root_id;
12048         key.type = BTRFS_ROOT_ITEM_KEY;
12049         key.offset = (u64)-1;
12050
12051         root = btrfs_read_fs_root(root->fs_info, &key);
12052         if (IS_ERR(root)) {
12053                 fprintf(stderr, "Couldn't find owner root %llu\n",
12054                         key.objectid);
12055                 return PTR_ERR(root);
12056         }
12057
12058         trans = btrfs_start_transaction(root, 1);
12059         if (IS_ERR(trans))
12060                 return PTR_ERR(trans);
12061
12062         btrfs_init_path(&path);
12063         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12064         if (ret) {
12065                 if (ret > 0)
12066                         ret = 0;
12067                 goto out;
12068         }
12069         ret = btrfs_del_item(trans, root, &path);
12070 out:
12071         btrfs_commit_transaction(trans, root);
12072         btrfs_release_path(&path);
12073         return ret;
12074 }
12075
12076 static int zero_log_tree(struct btrfs_root *root)
12077 {
12078         struct btrfs_trans_handle *trans;
12079         int ret;
12080
12081         trans = btrfs_start_transaction(root, 1);
12082         if (IS_ERR(trans)) {
12083                 ret = PTR_ERR(trans);
12084                 return ret;
12085         }
12086         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12087         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12088         ret = btrfs_commit_transaction(trans, root);
12089         return ret;
12090 }
12091
12092 static int populate_csum(struct btrfs_trans_handle *trans,
12093                          struct btrfs_root *csum_root, char *buf, u64 start,
12094                          u64 len)
12095 {
12096         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12097         u64 offset = 0;
12098         u64 sectorsize;
12099         int ret = 0;
12100
12101         while (offset < len) {
12102                 sectorsize = fs_info->sectorsize;
12103                 ret = read_extent_data(fs_info, buf, start + offset,
12104                                        &sectorsize, 0);
12105                 if (ret)
12106                         break;
12107                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12108                                             start + offset, buf, sectorsize);
12109                 if (ret)
12110                         break;
12111                 offset += sectorsize;
12112         }
12113         return ret;
12114 }
12115
12116 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12117                                       struct btrfs_root *csum_root,
12118                                       struct btrfs_root *cur_root)
12119 {
12120         struct btrfs_path path;
12121         struct btrfs_key key;
12122         struct extent_buffer *node;
12123         struct btrfs_file_extent_item *fi;
12124         char *buf = NULL;
12125         u64 start = 0;
12126         u64 len = 0;
12127         int slot = 0;
12128         int ret = 0;
12129
12130         buf = malloc(cur_root->fs_info->sectorsize);
12131         if (!buf)
12132                 return -ENOMEM;
12133
12134         btrfs_init_path(&path);
12135         key.objectid = 0;
12136         key.offset = 0;
12137         key.type = 0;
12138         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12139         if (ret < 0)
12140                 goto out;
12141         /* Iterate all regular file extents and fill its csum */
12142         while (1) {
12143                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12144
12145                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12146                         goto next;
12147                 node = path.nodes[0];
12148                 slot = path.slots[0];
12149                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12150                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12151                         goto next;
12152                 start = btrfs_file_extent_disk_bytenr(node, fi);
12153                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12154
12155                 ret = populate_csum(trans, csum_root, buf, start, len);
12156                 if (ret == -EEXIST)
12157                         ret = 0;
12158                 if (ret < 0)
12159                         goto out;
12160 next:
12161                 /*
12162                  * TODO: if next leaf is corrupted, jump to nearest next valid
12163                  * leaf.
12164                  */
12165                 ret = btrfs_next_item(cur_root, &path);
12166                 if (ret < 0)
12167                         goto out;
12168                 if (ret > 0) {
12169                         ret = 0;
12170                         goto out;
12171                 }
12172         }
12173
12174 out:
12175         btrfs_release_path(&path);
12176         free(buf);
12177         return ret;
12178 }
12179
12180 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12181                                   struct btrfs_root *csum_root)
12182 {
12183         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12184         struct btrfs_path path;
12185         struct btrfs_root *tree_root = fs_info->tree_root;
12186         struct btrfs_root *cur_root;
12187         struct extent_buffer *node;
12188         struct btrfs_key key;
12189         int slot = 0;
12190         int ret = 0;
12191
12192         btrfs_init_path(&path);
12193         key.objectid = BTRFS_FS_TREE_OBJECTID;
12194         key.offset = 0;
12195         key.type = BTRFS_ROOT_ITEM_KEY;
12196         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12197         if (ret < 0)
12198                 goto out;
12199         if (ret > 0) {
12200                 ret = -ENOENT;
12201                 goto out;
12202         }
12203
12204         while (1) {
12205                 node = path.nodes[0];
12206                 slot = path.slots[0];
12207                 btrfs_item_key_to_cpu(node, &key, slot);
12208                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12209                         goto out;
12210                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12211                         goto next;
12212                 if (!is_fstree(key.objectid))
12213                         goto next;
12214                 key.offset = (u64)-1;
12215
12216                 cur_root = btrfs_read_fs_root(fs_info, &key);
12217                 if (IS_ERR(cur_root) || !cur_root) {
12218                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12219                                 key.objectid);
12220                         goto out;
12221                 }
12222                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12223                                 cur_root);
12224                 if (ret < 0)
12225                         goto out;
12226 next:
12227                 ret = btrfs_next_item(tree_root, &path);
12228                 if (ret > 0) {
12229                         ret = 0;
12230                         goto out;
12231                 }
12232                 if (ret < 0)
12233                         goto out;
12234         }
12235
12236 out:
12237         btrfs_release_path(&path);
12238         return ret;
12239 }
12240
12241 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12242                                       struct btrfs_root *csum_root)
12243 {
12244         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12245         struct btrfs_path path;
12246         struct btrfs_extent_item *ei;
12247         struct extent_buffer *leaf;
12248         char *buf;
12249         struct btrfs_key key;
12250         int ret;
12251
12252         btrfs_init_path(&path);
12253         key.objectid = 0;
12254         key.type = BTRFS_EXTENT_ITEM_KEY;
12255         key.offset = 0;
12256         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12257         if (ret < 0) {
12258                 btrfs_release_path(&path);
12259                 return ret;
12260         }
12261
12262         buf = malloc(csum_root->fs_info->sectorsize);
12263         if (!buf) {
12264                 btrfs_release_path(&path);
12265                 return -ENOMEM;
12266         }
12267
12268         while (1) {
12269                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12270                         ret = btrfs_next_leaf(extent_root, &path);
12271                         if (ret < 0)
12272                                 break;
12273                         if (ret) {
12274                                 ret = 0;
12275                                 break;
12276                         }
12277                 }
12278                 leaf = path.nodes[0];
12279
12280                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12281                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12282                         path.slots[0]++;
12283                         continue;
12284                 }
12285
12286                 ei = btrfs_item_ptr(leaf, path.slots[0],
12287                                     struct btrfs_extent_item);
12288                 if (!(btrfs_extent_flags(leaf, ei) &
12289                       BTRFS_EXTENT_FLAG_DATA)) {
12290                         path.slots[0]++;
12291                         continue;
12292                 }
12293
12294                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12295                                     key.offset);
12296                 if (ret)
12297                         break;
12298                 path.slots[0]++;
12299         }
12300
12301         btrfs_release_path(&path);
12302         free(buf);
12303         return ret;
12304 }
12305
12306 /*
12307  * Recalculate the csum and put it into the csum tree.
12308  *
12309  * Extent tree init will wipe out all the extent info, so in that case, we
12310  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12311  * will use fs/subvol trees to init the csum tree.
12312  */
12313 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12314                           struct btrfs_root *csum_root,
12315                           int search_fs_tree)
12316 {
12317         if (search_fs_tree)
12318                 return fill_csum_tree_from_fs(trans, csum_root);
12319         else
12320                 return fill_csum_tree_from_extent(trans, csum_root);
12321 }
12322
12323 static void free_roots_info_cache(void)
12324 {
12325         if (!roots_info_cache)
12326                 return;
12327
12328         while (!cache_tree_empty(roots_info_cache)) {
12329                 struct cache_extent *entry;
12330                 struct root_item_info *rii;
12331
12332                 entry = first_cache_extent(roots_info_cache);
12333                 if (!entry)
12334                         break;
12335                 remove_cache_extent(roots_info_cache, entry);
12336                 rii = container_of(entry, struct root_item_info, cache_extent);
12337                 free(rii);
12338         }
12339
12340         free(roots_info_cache);
12341         roots_info_cache = NULL;
12342 }
12343
12344 static int build_roots_info_cache(struct btrfs_fs_info *info)
12345 {
12346         int ret = 0;
12347         struct btrfs_key key;
12348         struct extent_buffer *leaf;
12349         struct btrfs_path path;
12350
12351         if (!roots_info_cache) {
12352                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12353                 if (!roots_info_cache)
12354                         return -ENOMEM;
12355                 cache_tree_init(roots_info_cache);
12356         }
12357
12358         btrfs_init_path(&path);
12359         key.objectid = 0;
12360         key.type = BTRFS_EXTENT_ITEM_KEY;
12361         key.offset = 0;
12362         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12363         if (ret < 0)
12364                 goto out;
12365         leaf = path.nodes[0];
12366
12367         while (1) {
12368                 struct btrfs_key found_key;
12369                 struct btrfs_extent_item *ei;
12370                 struct btrfs_extent_inline_ref *iref;
12371                 int slot = path.slots[0];
12372                 int type;
12373                 u64 flags;
12374                 u64 root_id;
12375                 u8 level;
12376                 struct cache_extent *entry;
12377                 struct root_item_info *rii;
12378
12379                 if (slot >= btrfs_header_nritems(leaf)) {
12380                         ret = btrfs_next_leaf(info->extent_root, &path);
12381                         if (ret < 0) {
12382                                 break;
12383                         } else if (ret) {
12384                                 ret = 0;
12385                                 break;
12386                         }
12387                         leaf = path.nodes[0];
12388                         slot = path.slots[0];
12389                 }
12390
12391                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12392
12393                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12394                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12395                         goto next;
12396
12397                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12398                 flags = btrfs_extent_flags(leaf, ei);
12399
12400                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12401                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12402                         goto next;
12403
12404                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12405                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12406                         level = found_key.offset;
12407                 } else {
12408                         struct btrfs_tree_block_info *binfo;
12409
12410                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12411                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12412                         level = btrfs_tree_block_level(leaf, binfo);
12413                 }
12414
12415                 /*
12416                  * For a root extent, it must be of the following type and the
12417                  * first (and only one) iref in the item.
12418                  */
12419                 type = btrfs_extent_inline_ref_type(leaf, iref);
12420                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12421                         goto next;
12422
12423                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12424                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12425                 if (!entry) {
12426                         rii = malloc(sizeof(struct root_item_info));
12427                         if (!rii) {
12428                                 ret = -ENOMEM;
12429                                 goto out;
12430                         }
12431                         rii->cache_extent.start = root_id;
12432                         rii->cache_extent.size = 1;
12433                         rii->level = (u8)-1;
12434                         entry = &rii->cache_extent;
12435                         ret = insert_cache_extent(roots_info_cache, entry);
12436                         ASSERT(ret == 0);
12437                 } else {
12438                         rii = container_of(entry, struct root_item_info,
12439                                            cache_extent);
12440                 }
12441
12442                 ASSERT(rii->cache_extent.start == root_id);
12443                 ASSERT(rii->cache_extent.size == 1);
12444
12445                 if (level > rii->level || rii->level == (u8)-1) {
12446                         rii->level = level;
12447                         rii->bytenr = found_key.objectid;
12448                         rii->gen = btrfs_extent_generation(leaf, ei);
12449                         rii->node_count = 1;
12450                 } else if (level == rii->level) {
12451                         rii->node_count++;
12452                 }
12453 next:
12454                 path.slots[0]++;
12455         }
12456
12457 out:
12458         btrfs_release_path(&path);
12459
12460         return ret;
12461 }
12462
12463 static int maybe_repair_root_item(struct btrfs_path *path,
12464                                   const struct btrfs_key *root_key,
12465                                   const int read_only_mode)
12466 {
12467         const u64 root_id = root_key->objectid;
12468         struct cache_extent *entry;
12469         struct root_item_info *rii;
12470         struct btrfs_root_item ri;
12471         unsigned long offset;
12472
12473         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12474         if (!entry) {
12475                 fprintf(stderr,
12476                         "Error: could not find extent items for root %llu\n",
12477                         root_key->objectid);
12478                 return -ENOENT;
12479         }
12480
12481         rii = container_of(entry, struct root_item_info, cache_extent);
12482         ASSERT(rii->cache_extent.start == root_id);
12483         ASSERT(rii->cache_extent.size == 1);
12484
12485         if (rii->node_count != 1) {
12486                 fprintf(stderr,
12487                         "Error: could not find btree root extent for root %llu\n",
12488                         root_id);
12489                 return -ENOENT;
12490         }
12491
12492         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12493         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12494
12495         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12496             btrfs_root_level(&ri) != rii->level ||
12497             btrfs_root_generation(&ri) != rii->gen) {
12498
12499                 /*
12500                  * If we're in repair mode but our caller told us to not update
12501                  * the root item, i.e. just check if it needs to be updated, don't
12502                  * print this message, since the caller will call us again shortly
12503                  * for the same root item without read only mode (the caller will
12504                  * open a transaction first).
12505                  */
12506                 if (!(read_only_mode && repair))
12507                         fprintf(stderr,
12508                                 "%sroot item for root %llu,"
12509                                 " current bytenr %llu, current gen %llu, current level %u,"
12510                                 " new bytenr %llu, new gen %llu, new level %u\n",
12511                                 (read_only_mode ? "" : "fixing "),
12512                                 root_id,
12513                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12514                                 btrfs_root_level(&ri),
12515                                 rii->bytenr, rii->gen, rii->level);
12516
12517                 if (btrfs_root_generation(&ri) > rii->gen) {
12518                         fprintf(stderr,
12519                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12520                                 root_id, btrfs_root_generation(&ri), rii->gen);
12521                         return -EINVAL;
12522                 }
12523
12524                 if (!read_only_mode) {
12525                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12526                         btrfs_set_root_level(&ri, rii->level);
12527                         btrfs_set_root_generation(&ri, rii->gen);
12528                         write_extent_buffer(path->nodes[0], &ri,
12529                                             offset, sizeof(ri));
12530                 }
12531
12532                 return 1;
12533         }
12534
12535         return 0;
12536 }
12537
12538 /*
12539  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12540  * caused read-only snapshots to be corrupted if they were created at a moment
12541  * when the source subvolume/snapshot had orphan items. The issue was that the
12542  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12543  * node instead of the post orphan cleanup root node.
12544  * So this function, and its callees, just detects and fixes those cases. Even
12545  * though the regression was for read-only snapshots, this function applies to
12546  * any snapshot/subvolume root.
12547  * This must be run before any other repair code - not doing it so, makes other
12548  * repair code delete or modify backrefs in the extent tree for example, which
12549  * will result in an inconsistent fs after repairing the root items.
12550  */
12551 static int repair_root_items(struct btrfs_fs_info *info)
12552 {
12553         struct btrfs_path path;
12554         struct btrfs_key key;
12555         struct extent_buffer *leaf;
12556         struct btrfs_trans_handle *trans = NULL;
12557         int ret = 0;
12558         int bad_roots = 0;
12559         int need_trans = 0;
12560
12561         btrfs_init_path(&path);
12562
12563         ret = build_roots_info_cache(info);
12564         if (ret)
12565                 goto out;
12566
12567         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12568         key.type = BTRFS_ROOT_ITEM_KEY;
12569         key.offset = 0;
12570
12571 again:
12572         /*
12573          * Avoid opening and committing transactions if a leaf doesn't have
12574          * any root items that need to be fixed, so that we avoid rotating
12575          * backup roots unnecessarily.
12576          */
12577         if (need_trans) {
12578                 trans = btrfs_start_transaction(info->tree_root, 1);
12579                 if (IS_ERR(trans)) {
12580                         ret = PTR_ERR(trans);
12581                         goto out;
12582                 }
12583         }
12584
12585         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12586                                 0, trans ? 1 : 0);
12587         if (ret < 0)
12588                 goto out;
12589         leaf = path.nodes[0];
12590
12591         while (1) {
12592                 struct btrfs_key found_key;
12593
12594                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12595                         int no_more_keys = find_next_key(&path, &key);
12596
12597                         btrfs_release_path(&path);
12598                         if (trans) {
12599                                 ret = btrfs_commit_transaction(trans,
12600                                                                info->tree_root);
12601                                 trans = NULL;
12602                                 if (ret < 0)
12603                                         goto out;
12604                         }
12605                         need_trans = 0;
12606                         if (no_more_keys)
12607                                 break;
12608                         goto again;
12609                 }
12610
12611                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12612
12613                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12614                         goto next;
12615                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12616                         goto next;
12617
12618                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12619                 if (ret < 0)
12620                         goto out;
12621                 if (ret) {
12622                         if (!trans && repair) {
12623                                 need_trans = 1;
12624                                 key = found_key;
12625                                 btrfs_release_path(&path);
12626                                 goto again;
12627                         }
12628                         bad_roots++;
12629                 }
12630 next:
12631                 path.slots[0]++;
12632         }
12633         ret = 0;
12634 out:
12635         free_roots_info_cache();
12636         btrfs_release_path(&path);
12637         if (trans)
12638                 btrfs_commit_transaction(trans, info->tree_root);
12639         if (ret < 0)
12640                 return ret;
12641
12642         return bad_roots;
12643 }
12644
12645 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12646 {
12647         struct btrfs_trans_handle *trans;
12648         struct btrfs_block_group_cache *bg_cache;
12649         u64 current = 0;
12650         int ret = 0;
12651
12652         /* Clear all free space cache inodes and its extent data */
12653         while (1) {
12654                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12655                 if (!bg_cache)
12656                         break;
12657                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12658                 if (ret < 0)
12659                         return ret;
12660                 current = bg_cache->key.objectid + bg_cache->key.offset;
12661         }
12662
12663         /* Don't forget to set cache_generation to -1 */
12664         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12665         if (IS_ERR(trans)) {
12666                 error("failed to update super block cache generation");
12667                 return PTR_ERR(trans);
12668         }
12669         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12670         btrfs_commit_transaction(trans, fs_info->tree_root);
12671
12672         return ret;
12673 }
12674
12675 const char * const cmd_check_usage[] = {
12676         "btrfs check [options] <device>",
12677         "Check structural integrity of a filesystem (unmounted).",
12678         "Check structural integrity of an unmounted filesystem. Verify internal",
12679         "trees' consistency and item connectivity. In the repair mode try to",
12680         "fix the problems found. ",
12681         "WARNING: the repair mode is considered dangerous",
12682         "",
12683         "-s|--super <superblock>     use this superblock copy",
12684         "-b|--backup                 use the first valid backup root copy",
12685         "--repair                    try to repair the filesystem",
12686         "--readonly                  run in read-only mode (default)",
12687         "--init-csum-tree            create a new CRC tree",
12688         "--init-extent-tree          create a new extent tree",
12689         "--mode <MODE>               allows choice of memory/IO trade-offs",
12690         "                            where MODE is one of:",
12691         "                            original - read inodes and extents to memory (requires",
12692         "                                       more memory, does less IO)",
12693         "                            lowmem   - try to use less memory but read blocks again",
12694         "                                       when needed",
12695         "--check-data-csum           verify checksums of data blocks",
12696         "-Q|--qgroup-report          print a report on qgroup consistency",
12697         "-E|--subvol-extents <subvolid>",
12698         "                            print subvolume extents and sharing state",
12699         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12700         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12701         "-p|--progress               indicate progress",
12702         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12703         NULL
12704 };
12705
12706 int cmd_check(int argc, char **argv)
12707 {
12708         struct cache_tree root_cache;
12709         struct btrfs_root *root;
12710         struct btrfs_fs_info *info;
12711         u64 bytenr = 0;
12712         u64 subvolid = 0;
12713         u64 tree_root_bytenr = 0;
12714         u64 chunk_root_bytenr = 0;
12715         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12716         int ret;
12717         int err = 0;
12718         u64 num;
12719         int init_csum_tree = 0;
12720         int readonly = 0;
12721         int clear_space_cache = 0;
12722         int qgroup_report = 0;
12723         int qgroups_repaired = 0;
12724         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12725
12726         while(1) {
12727                 int c;
12728                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12729                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12730                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12731                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12732                 static const struct option long_options[] = {
12733                         { "super", required_argument, NULL, 's' },
12734                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12735                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12736                         { "init-csum-tree", no_argument, NULL,
12737                                 GETOPT_VAL_INIT_CSUM },
12738                         { "init-extent-tree", no_argument, NULL,
12739                                 GETOPT_VAL_INIT_EXTENT },
12740                         { "check-data-csum", no_argument, NULL,
12741                                 GETOPT_VAL_CHECK_CSUM },
12742                         { "backup", no_argument, NULL, 'b' },
12743                         { "subvol-extents", required_argument, NULL, 'E' },
12744                         { "qgroup-report", no_argument, NULL, 'Q' },
12745                         { "tree-root", required_argument, NULL, 'r' },
12746                         { "chunk-root", required_argument, NULL,
12747                                 GETOPT_VAL_CHUNK_TREE },
12748                         { "progress", no_argument, NULL, 'p' },
12749                         { "mode", required_argument, NULL,
12750                                 GETOPT_VAL_MODE },
12751                         { "clear-space-cache", required_argument, NULL,
12752                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12753                         { NULL, 0, NULL, 0}
12754                 };
12755
12756                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12757                 if (c < 0)
12758                         break;
12759                 switch(c) {
12760                         case 'a': /* ignored */ break;
12761                         case 'b':
12762                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12763                                 break;
12764                         case 's':
12765                                 num = arg_strtou64(optarg);
12766                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12767                                         error(
12768                                         "super mirror should be less than %d",
12769                                                 BTRFS_SUPER_MIRROR_MAX);
12770                                         exit(1);
12771                                 }
12772                                 bytenr = btrfs_sb_offset(((int)num));
12773                                 printf("using SB copy %llu, bytenr %llu\n", num,
12774                                        (unsigned long long)bytenr);
12775                                 break;
12776                         case 'Q':
12777                                 qgroup_report = 1;
12778                                 break;
12779                         case 'E':
12780                                 subvolid = arg_strtou64(optarg);
12781                                 break;
12782                         case 'r':
12783                                 tree_root_bytenr = arg_strtou64(optarg);
12784                                 break;
12785                         case GETOPT_VAL_CHUNK_TREE:
12786                                 chunk_root_bytenr = arg_strtou64(optarg);
12787                                 break;
12788                         case 'p':
12789                                 ctx.progress_enabled = true;
12790                                 break;
12791                         case '?':
12792                         case 'h':
12793                                 usage(cmd_check_usage);
12794                         case GETOPT_VAL_REPAIR:
12795                                 printf("enabling repair mode\n");
12796                                 repair = 1;
12797                                 ctree_flags |= OPEN_CTREE_WRITES;
12798                                 break;
12799                         case GETOPT_VAL_READONLY:
12800                                 readonly = 1;
12801                                 break;
12802                         case GETOPT_VAL_INIT_CSUM:
12803                                 printf("Creating a new CRC tree\n");
12804                                 init_csum_tree = 1;
12805                                 repair = 1;
12806                                 ctree_flags |= OPEN_CTREE_WRITES;
12807                                 break;
12808                         case GETOPT_VAL_INIT_EXTENT:
12809                                 init_extent_tree = 1;
12810                                 ctree_flags |= (OPEN_CTREE_WRITES |
12811                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12812                                 repair = 1;
12813                                 break;
12814                         case GETOPT_VAL_CHECK_CSUM:
12815                                 check_data_csum = 1;
12816                                 break;
12817                         case GETOPT_VAL_MODE:
12818                                 check_mode = parse_check_mode(optarg);
12819                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12820                                         error("unknown mode: %s", optarg);
12821                                         exit(1);
12822                                 }
12823                                 break;
12824                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12825                                 if (strcmp(optarg, "v1") == 0) {
12826                                         clear_space_cache = 1;
12827                                 } else if (strcmp(optarg, "v2") == 0) {
12828                                         clear_space_cache = 2;
12829                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12830                                 } else {
12831                                         error(
12832                 "invalid argument to --clear-space-cache, must be v1 or v2");
12833                                         exit(1);
12834                                 }
12835                                 ctree_flags |= OPEN_CTREE_WRITES;
12836                                 break;
12837                 }
12838         }
12839
12840         if (check_argc_exact(argc - optind, 1))
12841                 usage(cmd_check_usage);
12842
12843         if (ctx.progress_enabled) {
12844                 ctx.tp = TASK_NOTHING;
12845                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12846         }
12847
12848         /* This check is the only reason for --readonly to exist */
12849         if (readonly && repair) {
12850                 error("repair options are not compatible with --readonly");
12851                 exit(1);
12852         }
12853
12854         /*
12855          * Not supported yet
12856          */
12857         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12858                 error("low memory mode doesn't support repair yet");
12859                 exit(1);
12860         }
12861
12862         radix_tree_init();
12863         cache_tree_init(&root_cache);
12864
12865         if((ret = check_mounted(argv[optind])) < 0) {
12866                 error("could not check mount status: %s", strerror(-ret));
12867                 err |= !!ret;
12868                 goto err_out;
12869         } else if(ret) {
12870                 error("%s is currently mounted, aborting", argv[optind]);
12871                 ret = -EBUSY;
12872                 err |= !!ret;
12873                 goto err_out;
12874         }
12875
12876         /* only allow partial opening under repair mode */
12877         if (repair)
12878                 ctree_flags |= OPEN_CTREE_PARTIAL;
12879
12880         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12881                                   chunk_root_bytenr, ctree_flags);
12882         if (!info) {
12883                 error("cannot open file system");
12884                 ret = -EIO;
12885                 err |= !!ret;
12886                 goto err_out;
12887         }
12888
12889         global_info = info;
12890         root = info->fs_root;
12891         if (clear_space_cache == 1) {
12892                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12893                         error(
12894                 "free space cache v2 detected, use --clear-space-cache v2");
12895                         ret = 1;
12896                         goto close_out;
12897                 }
12898                 printf("Clearing free space cache\n");
12899                 ret = clear_free_space_cache(info);
12900                 if (ret) {
12901                         error("failed to clear free space cache");
12902                         ret = 1;
12903                 } else {
12904                         printf("Free space cache cleared\n");
12905                 }
12906                 goto close_out;
12907         } else if (clear_space_cache == 2) {
12908                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12909                         printf("no free space cache v2 to clear\n");
12910                         ret = 0;
12911                         goto close_out;
12912                 }
12913                 printf("Clear free space cache v2\n");
12914                 ret = btrfs_clear_free_space_tree(info);
12915                 if (ret) {
12916                         error("failed to clear free space cache v2: %d", ret);
12917                         ret = 1;
12918                 } else {
12919                         printf("free space cache v2 cleared\n");
12920                 }
12921                 goto close_out;
12922         }
12923
12924         /*
12925          * repair mode will force us to commit transaction which
12926          * will make us fail to load log tree when mounting.
12927          */
12928         if (repair && btrfs_super_log_root(info->super_copy)) {
12929                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12930                 if (!ret) {
12931                         ret = 1;
12932                         err |= !!ret;
12933                         goto close_out;
12934                 }
12935                 ret = zero_log_tree(root);
12936                 err |= !!ret;
12937                 if (ret) {
12938                         error("failed to zero log tree: %d", ret);
12939                         goto close_out;
12940                 }
12941         }
12942
12943         uuid_unparse(info->super_copy->fsid, uuidbuf);
12944         if (qgroup_report) {
12945                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12946                        uuidbuf);
12947                 ret = qgroup_verify_all(info);
12948                 err |= !!ret;
12949                 if (ret == 0)
12950                         report_qgroups(1);
12951                 goto close_out;
12952         }
12953         if (subvolid) {
12954                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12955                        subvolid, argv[optind], uuidbuf);
12956                 ret = print_extent_state(info, subvolid);
12957                 err |= !!ret;
12958                 goto close_out;
12959         }
12960         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12961
12962         if (!extent_buffer_uptodate(info->tree_root->node) ||
12963             !extent_buffer_uptodate(info->dev_root->node) ||
12964             !extent_buffer_uptodate(info->chunk_root->node)) {
12965                 error("critical roots corrupted, unable to check the filesystem");
12966                 err |= !!ret;
12967                 ret = -EIO;
12968                 goto close_out;
12969         }
12970
12971         if (init_extent_tree || init_csum_tree) {
12972                 struct btrfs_trans_handle *trans;
12973
12974                 trans = btrfs_start_transaction(info->extent_root, 0);
12975                 if (IS_ERR(trans)) {
12976                         error("error starting transaction");
12977                         ret = PTR_ERR(trans);
12978                         err |= !!ret;
12979                         goto close_out;
12980                 }
12981
12982                 if (init_extent_tree) {
12983                         printf("Creating a new extent tree\n");
12984                         ret = reinit_extent_tree(trans, info);
12985                         err |= !!ret;
12986                         if (ret)
12987                                 goto close_out;
12988                 }
12989
12990                 if (init_csum_tree) {
12991                         printf("Reinitialize checksum tree\n");
12992                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12993                         if (ret) {
12994                                 error("checksum tree initialization failed: %d",
12995                                                 ret);
12996                                 ret = -EIO;
12997                                 err |= !!ret;
12998                                 goto close_out;
12999                         }
13000
13001                         ret = fill_csum_tree(trans, info->csum_root,
13002                                              init_extent_tree);
13003                         err |= !!ret;
13004                         if (ret) {
13005                                 error("checksum tree refilling failed: %d", ret);
13006                                 return -EIO;
13007                         }
13008                 }
13009                 /*
13010                  * Ok now we commit and run the normal fsck, which will add
13011                  * extent entries for all of the items it finds.
13012                  */
13013                 ret = btrfs_commit_transaction(trans, info->extent_root);
13014                 err |= !!ret;
13015                 if (ret)
13016                         goto close_out;
13017         }
13018         if (!extent_buffer_uptodate(info->extent_root->node)) {
13019                 error("critical: extent_root, unable to check the filesystem");
13020                 ret = -EIO;
13021                 err |= !!ret;
13022                 goto close_out;
13023         }
13024         if (!extent_buffer_uptodate(info->csum_root->node)) {
13025                 error("critical: csum_root, unable to check the filesystem");
13026                 ret = -EIO;
13027                 err |= !!ret;
13028                 goto close_out;
13029         }
13030
13031         if (!ctx.progress_enabled)
13032                 fprintf(stderr, "checking extents\n");
13033         if (check_mode == CHECK_MODE_LOWMEM)
13034                 ret = check_chunks_and_extents_v2(root);
13035         else
13036                 ret = check_chunks_and_extents(root);
13037         err |= !!ret;
13038         if (ret)
13039                 error(
13040                 "errors found in extent allocation tree or chunk allocation");
13041
13042         ret = repair_root_items(info);
13043         err |= !!ret;
13044         if (ret < 0) {
13045                 error("failed to repair root items: %s", strerror(-ret));
13046                 goto close_out;
13047         }
13048         if (repair) {
13049                 fprintf(stderr, "Fixed %d roots.\n", ret);
13050                 ret = 0;
13051         } else if (ret > 0) {
13052                 fprintf(stderr,
13053                        "Found %d roots with an outdated root item.\n",
13054                        ret);
13055                 fprintf(stderr,
13056                         "Please run a filesystem check with the option --repair to fix them.\n");
13057                 ret = 1;
13058                 err |= !!ret;
13059                 goto close_out;
13060         }
13061
13062         if (!ctx.progress_enabled) {
13063                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13064                         fprintf(stderr, "checking free space tree\n");
13065                 else
13066                         fprintf(stderr, "checking free space cache\n");
13067         }
13068         ret = check_space_cache(root);
13069         err |= !!ret;
13070         if (ret) {
13071                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13072                         error("errors found in free space tree");
13073                 else
13074                         error("errors found in free space cache");
13075                 goto out;
13076         }
13077
13078         /*
13079          * We used to have to have these hole extents in between our real
13080          * extents so if we don't have this flag set we need to make sure there
13081          * are no gaps in the file extents for inodes, otherwise we can just
13082          * ignore it when this happens.
13083          */
13084         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13085         if (!ctx.progress_enabled)
13086                 fprintf(stderr, "checking fs roots\n");
13087         if (check_mode == CHECK_MODE_LOWMEM)
13088                 ret = check_fs_roots_v2(root->fs_info);
13089         else
13090                 ret = check_fs_roots(root, &root_cache);
13091         err |= !!ret;
13092         if (ret) {
13093                 error("errors found in fs roots");
13094                 goto out;
13095         }
13096
13097         fprintf(stderr, "checking csums\n");
13098         ret = check_csums(root);
13099         err |= !!ret;
13100         if (ret) {
13101                 error("errors found in csum tree");
13102                 goto out;
13103         }
13104
13105         fprintf(stderr, "checking root refs\n");
13106         /* For low memory mode, check_fs_roots_v2 handles root refs */
13107         if (check_mode != CHECK_MODE_LOWMEM) {
13108                 ret = check_root_refs(root, &root_cache);
13109                 err |= !!ret;
13110                 if (ret) {
13111                         error("errors found in root refs");
13112                         goto out;
13113                 }
13114         }
13115
13116         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13117                 struct extent_buffer *eb;
13118
13119                 eb = list_first_entry(&root->fs_info->recow_ebs,
13120                                       struct extent_buffer, recow);
13121                 list_del_init(&eb->recow);
13122                 ret = recow_extent_buffer(root, eb);
13123                 err |= !!ret;
13124                 if (ret) {
13125                         error("fails to fix transid errors");
13126                         break;
13127                 }
13128         }
13129
13130         while (!list_empty(&delete_items)) {
13131                 struct bad_item *bad;
13132
13133                 bad = list_first_entry(&delete_items, struct bad_item, list);
13134                 list_del_init(&bad->list);
13135                 if (repair) {
13136                         ret = delete_bad_item(root, bad);
13137                         err |= !!ret;
13138                 }
13139                 free(bad);
13140         }
13141
13142         if (info->quota_enabled) {
13143                 fprintf(stderr, "checking quota groups\n");
13144                 ret = qgroup_verify_all(info);
13145                 err |= !!ret;
13146                 if (ret) {
13147                         error("failed to check quota groups");
13148                         goto out;
13149                 }
13150                 report_qgroups(0);
13151                 ret = repair_qgroups(info, &qgroups_repaired);
13152                 err |= !!ret;
13153                 if (err) {
13154                         error("failed to repair quota groups");
13155                         goto out;
13156                 }
13157                 ret = 0;
13158         }
13159
13160         if (!list_empty(&root->fs_info->recow_ebs)) {
13161                 error("transid errors in file system");
13162                 ret = 1;
13163                 err |= !!ret;
13164         }
13165 out:
13166         if (found_old_backref) { /*
13167                  * there was a disk format change when mixed
13168                  * backref was in testing tree. The old format
13169                  * existed about one week.
13170                  */
13171                 printf("\n * Found old mixed backref format. "
13172                        "The old format is not supported! *"
13173                        "\n * Please mount the FS in readonly mode, "
13174                        "backup data and re-format the FS. *\n\n");
13175                 err |= 1;
13176         }
13177         printf("found %llu bytes used, ",
13178                (unsigned long long)bytes_used);
13179         if (err)
13180                 printf("error(s) found\n");
13181         else
13182                 printf("no error found\n");
13183         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13184         printf("total tree bytes: %llu\n",
13185                (unsigned long long)total_btree_bytes);
13186         printf("total fs tree bytes: %llu\n",
13187                (unsigned long long)total_fs_tree_bytes);
13188         printf("total extent tree bytes: %llu\n",
13189                (unsigned long long)total_extent_tree_bytes);
13190         printf("btree space waste bytes: %llu\n",
13191                (unsigned long long)btree_space_waste);
13192         printf("file data blocks allocated: %llu\n referenced %llu\n",
13193                 (unsigned long long)data_bytes_allocated,
13194                 (unsigned long long)data_bytes_referenced);
13195
13196         free_qgroup_counts();
13197         free_root_recs_tree(&root_cache);
13198 close_out:
13199         close_ctree(root);
13200 err_out:
13201         if (ctx.progress_enabled)
13202                 task_deinit(ctx.info);
13203
13204         return err;
13205 }