6911064b93f79c18e406e6af4588677f54389097
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         struct btrfs_key drop_key;
229 };
230
231 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
232 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
233 #define REF_ERR_NO_INODE_REF            (1 << 2)
234 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
235 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
236 #define REF_ERR_DUP_INODE_REF           (1 << 5)
237 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
238 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
239 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
240 #define REF_ERR_NO_ROOT_REF             (1 << 9)
241 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
242 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
243 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
244
245 struct file_extent_hole {
246         struct rb_node node;
247         u64 start;
248         u64 len;
249 };
250
251 struct inode_record {
252         struct list_head backrefs;
253         unsigned int checked:1;
254         unsigned int merging:1;
255         unsigned int found_inode_item:1;
256         unsigned int found_dir_item:1;
257         unsigned int found_file_extent:1;
258         unsigned int found_csum_item:1;
259         unsigned int some_csum_missing:1;
260         unsigned int nodatasum:1;
261         int errors;
262
263         u64 ino;
264         u32 nlink;
265         u32 imode;
266         u64 isize;
267         u64 nbytes;
268
269         u32 found_link;
270         u64 found_size;
271         u64 extent_start;
272         u64 extent_end;
273         struct rb_root holes;
274         struct list_head orphan_extents;
275
276         u32 refs;
277 };
278
279 #define I_ERR_NO_INODE_ITEM             (1 << 0)
280 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
281 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
282 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
283 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
284 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
285 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
286 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
287 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
288 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
289 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
290 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
291 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
292 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
293 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
294
295 struct root_backref {
296         struct list_head list;
297         unsigned int found_dir_item:1;
298         unsigned int found_dir_index:1;
299         unsigned int found_back_ref:1;
300         unsigned int found_forward_ref:1;
301         unsigned int reachable:1;
302         int errors;
303         u64 ref_root;
304         u64 dir;
305         u64 index;
306         u16 namelen;
307         char name[0];
308 };
309
310 static inline struct root_backref* to_root_backref(struct list_head *entry)
311 {
312         return list_entry(entry, struct root_backref, list);
313 }
314
315 struct root_record {
316         struct list_head backrefs;
317         struct cache_extent cache;
318         unsigned int found_root_item:1;
319         u64 objectid;
320         u32 found_ref;
321 };
322
323 struct ptr_node {
324         struct cache_extent cache;
325         void *data;
326 };
327
328 struct shared_node {
329         struct cache_extent cache;
330         struct cache_tree root_cache;
331         struct cache_tree inode_cache;
332         struct inode_record *current;
333         u32 refs;
334 };
335
336 struct block_info {
337         u64 start;
338         u32 size;
339 };
340
341 struct walk_control {
342         struct cache_tree shared;
343         struct shared_node *nodes[BTRFS_MAX_LEVEL];
344         int active_node;
345         int root_level;
346 };
347
348 struct bad_item {
349         struct btrfs_key key;
350         u64 root_id;
351         struct list_head list;
352 };
353
354 struct extent_entry {
355         u64 bytenr;
356         u64 bytes;
357         int count;
358         int broken;
359         struct list_head list;
360 };
361
362 struct root_item_info {
363         /* level of the root */
364         u8 level;
365         /* number of nodes at this level, must be 1 for a root */
366         int node_count;
367         u64 bytenr;
368         u64 gen;
369         struct cache_extent cache_extent;
370 };
371
372 /*
373  * Error bit for low memory mode check.
374  *
375  * Currently no caller cares about it yet.  Just internal use for error
376  * classification.
377  */
378 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
379 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
380 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
381 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
382 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
383 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
384 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
385 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
386 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
387 #define CHUNK_TYPE_MISMATCH     (1 << 8)
388
389 static void *print_status_check(void *p)
390 {
391         struct task_ctx *priv = p;
392         const char work_indicator[] = { '.', 'o', 'O', 'o' };
393         uint32_t count = 0;
394         static char *task_position_string[] = {
395                 "checking extents",
396                 "checking free space cache",
397                 "checking fs roots",
398         };
399
400         task_period_start(priv->info, 1000 /* 1s */);
401
402         if (priv->tp == TASK_NOTHING)
403                 return NULL;
404
405         while (1) {
406                 printf("%s [%c]\r", task_position_string[priv->tp],
407                                 work_indicator[count % 4]);
408                 count++;
409                 fflush(stdout);
410                 task_period_wait(priv->info);
411         }
412         return NULL;
413 }
414
415 static int print_status_return(void *p)
416 {
417         printf("\n");
418         fflush(stdout);
419
420         return 0;
421 }
422
423 static enum btrfs_check_mode parse_check_mode(const char *str)
424 {
425         if (strcmp(str, "lowmem") == 0)
426                 return CHECK_MODE_LOWMEM;
427         if (strcmp(str, "orig") == 0)
428                 return CHECK_MODE_ORIGINAL;
429         if (strcmp(str, "original") == 0)
430                 return CHECK_MODE_ORIGINAL;
431
432         return CHECK_MODE_UNKNOWN;
433 }
434
435 /* Compatible function to allow reuse of old codes */
436 static u64 first_extent_gap(struct rb_root *holes)
437 {
438         struct file_extent_hole *hole;
439
440         if (RB_EMPTY_ROOT(holes))
441                 return (u64)-1;
442
443         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
444         return hole->start;
445 }
446
447 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
448 {
449         struct file_extent_hole *hole1;
450         struct file_extent_hole *hole2;
451
452         hole1 = rb_entry(node1, struct file_extent_hole, node);
453         hole2 = rb_entry(node2, struct file_extent_hole, node);
454
455         if (hole1->start > hole2->start)
456                 return -1;
457         if (hole1->start < hole2->start)
458                 return 1;
459         /* Now hole1->start == hole2->start */
460         if (hole1->len >= hole2->len)
461                 /*
462                  * Hole 1 will be merge center
463                  * Same hole will be merged later
464                  */
465                 return -1;
466         /* Hole 2 will be merge center */
467         return 1;
468 }
469
470 /*
471  * Add a hole to the record
472  *
473  * This will do hole merge for copy_file_extent_holes(),
474  * which will ensure there won't be continuous holes.
475  */
476 static int add_file_extent_hole(struct rb_root *holes,
477                                 u64 start, u64 len)
478 {
479         struct file_extent_hole *hole;
480         struct file_extent_hole *prev = NULL;
481         struct file_extent_hole *next = NULL;
482
483         hole = malloc(sizeof(*hole));
484         if (!hole)
485                 return -ENOMEM;
486         hole->start = start;
487         hole->len = len;
488         /* Since compare will not return 0, no -EEXIST will happen */
489         rb_insert(holes, &hole->node, compare_hole);
490
491         /* simple merge with previous hole */
492         if (rb_prev(&hole->node))
493                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
494                                 node);
495         if (prev && prev->start + prev->len >= hole->start) {
496                 hole->len = hole->start + hole->len - prev->start;
497                 hole->start = prev->start;
498                 rb_erase(&prev->node, holes);
499                 free(prev);
500                 prev = NULL;
501         }
502
503         /* iterate merge with next holes */
504         while (1) {
505                 if (!rb_next(&hole->node))
506                         break;
507                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
508                                         node);
509                 if (hole->start + hole->len >= next->start) {
510                         if (hole->start + hole->len <= next->start + next->len)
511                                 hole->len = next->start + next->len -
512                                             hole->start;
513                         rb_erase(&next->node, holes);
514                         free(next);
515                         next = NULL;
516                 } else
517                         break;
518         }
519         return 0;
520 }
521
522 static int compare_hole_range(struct rb_node *node, void *data)
523 {
524         struct file_extent_hole *hole;
525         u64 start;
526
527         hole = (struct file_extent_hole *)data;
528         start = hole->start;
529
530         hole = rb_entry(node, struct file_extent_hole, node);
531         if (start < hole->start)
532                 return -1;
533         if (start >= hole->start && start < hole->start + hole->len)
534                 return 0;
535         return 1;
536 }
537
538 /*
539  * Delete a hole in the record
540  *
541  * This will do the hole split and is much restrict than add.
542  */
543 static int del_file_extent_hole(struct rb_root *holes,
544                                 u64 start, u64 len)
545 {
546         struct file_extent_hole *hole;
547         struct file_extent_hole tmp;
548         u64 prev_start = 0;
549         u64 prev_len = 0;
550         u64 next_start = 0;
551         u64 next_len = 0;
552         struct rb_node *node;
553         int have_prev = 0;
554         int have_next = 0;
555         int ret = 0;
556
557         tmp.start = start;
558         tmp.len = len;
559         node = rb_search(holes, &tmp, compare_hole_range, NULL);
560         if (!node)
561                 return -EEXIST;
562         hole = rb_entry(node, struct file_extent_hole, node);
563         if (start + len > hole->start + hole->len)
564                 return -EEXIST;
565
566         /*
567          * Now there will be no overlap, delete the hole and re-add the
568          * split(s) if they exists.
569          */
570         if (start > hole->start) {
571                 prev_start = hole->start;
572                 prev_len = start - hole->start;
573                 have_prev = 1;
574         }
575         if (hole->start + hole->len > start + len) {
576                 next_start = start + len;
577                 next_len = hole->start + hole->len - start - len;
578                 have_next = 1;
579         }
580         rb_erase(node, holes);
581         free(hole);
582         if (have_prev) {
583                 ret = add_file_extent_hole(holes, prev_start, prev_len);
584                 if (ret < 0)
585                         return ret;
586         }
587         if (have_next) {
588                 ret = add_file_extent_hole(holes, next_start, next_len);
589                 if (ret < 0)
590                         return ret;
591         }
592         return 0;
593 }
594
595 static int copy_file_extent_holes(struct rb_root *dst,
596                                   struct rb_root *src)
597 {
598         struct file_extent_hole *hole;
599         struct rb_node *node;
600         int ret = 0;
601
602         node = rb_first(src);
603         while (node) {
604                 hole = rb_entry(node, struct file_extent_hole, node);
605                 ret = add_file_extent_hole(dst, hole->start, hole->len);
606                 if (ret)
607                         break;
608                 node = rb_next(node);
609         }
610         return ret;
611 }
612
613 static void free_file_extent_holes(struct rb_root *holes)
614 {
615         struct rb_node *node;
616         struct file_extent_hole *hole;
617
618         node = rb_first(holes);
619         while (node) {
620                 hole = rb_entry(node, struct file_extent_hole, node);
621                 rb_erase(node, holes);
622                 free(hole);
623                 node = rb_first(holes);
624         }
625 }
626
627 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
628
629 static void record_root_in_trans(struct btrfs_trans_handle *trans,
630                                  struct btrfs_root *root)
631 {
632         if (root->last_trans != trans->transid) {
633                 root->track_dirty = 1;
634                 root->last_trans = trans->transid;
635                 root->commit_root = root->node;
636                 extent_buffer_get(root->node);
637         }
638 }
639
640 static u8 imode_to_type(u32 imode)
641 {
642 #define S_SHIFT 12
643         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
644                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
645                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
646                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
647                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
648                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
649                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
650                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
651         };
652
653         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
654 #undef S_SHIFT
655 }
656
657 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
658 {
659         struct device_record *rec1;
660         struct device_record *rec2;
661
662         rec1 = rb_entry(node1, struct device_record, node);
663         rec2 = rb_entry(node2, struct device_record, node);
664         if (rec1->devid > rec2->devid)
665                 return -1;
666         else if (rec1->devid < rec2->devid)
667                 return 1;
668         else
669                 return 0;
670 }
671
672 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
673 {
674         struct inode_record *rec;
675         struct inode_backref *backref;
676         struct inode_backref *orig;
677         struct inode_backref *tmp;
678         struct orphan_data_extent *src_orphan;
679         struct orphan_data_extent *dst_orphan;
680         struct rb_node *rb;
681         size_t size;
682         int ret;
683
684         rec = malloc(sizeof(*rec));
685         if (!rec)
686                 return ERR_PTR(-ENOMEM);
687         memcpy(rec, orig_rec, sizeof(*rec));
688         rec->refs = 1;
689         INIT_LIST_HEAD(&rec->backrefs);
690         INIT_LIST_HEAD(&rec->orphan_extents);
691         rec->holes = RB_ROOT;
692
693         list_for_each_entry(orig, &orig_rec->backrefs, list) {
694                 size = sizeof(*orig) + orig->namelen + 1;
695                 backref = malloc(size);
696                 if (!backref) {
697                         ret = -ENOMEM;
698                         goto cleanup;
699                 }
700                 memcpy(backref, orig, size);
701                 list_add_tail(&backref->list, &rec->backrefs);
702         }
703         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
704                 dst_orphan = malloc(sizeof(*dst_orphan));
705                 if (!dst_orphan) {
706                         ret = -ENOMEM;
707                         goto cleanup;
708                 }
709                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
710                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
711         }
712         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
713         if (ret < 0)
714                 goto cleanup_rb;
715
716         return rec;
717
718 cleanup_rb:
719         rb = rb_first(&rec->holes);
720         while (rb) {
721                 struct file_extent_hole *hole;
722
723                 hole = rb_entry(rb, struct file_extent_hole, node);
724                 rb = rb_next(rb);
725                 free(hole);
726         }
727
728 cleanup:
729         if (!list_empty(&rec->backrefs))
730                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
731                         list_del(&orig->list);
732                         free(orig);
733                 }
734
735         if (!list_empty(&rec->orphan_extents))
736                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
737                         list_del(&orig->list);
738                         free(orig);
739                 }
740
741         free(rec);
742
743         return ERR_PTR(ret);
744 }
745
746 static void print_orphan_data_extents(struct list_head *orphan_extents,
747                                       u64 objectid)
748 {
749         struct orphan_data_extent *orphan;
750
751         if (list_empty(orphan_extents))
752                 return;
753         printf("The following data extent is lost in tree %llu:\n",
754                objectid);
755         list_for_each_entry(orphan, orphan_extents, list) {
756                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
757                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
758                        orphan->disk_len);
759         }
760 }
761
762 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
763 {
764         u64 root_objectid = root->root_key.objectid;
765         int errors = rec->errors;
766
767         if (!errors)
768                 return;
769         /* reloc root errors, we print its corresponding fs root objectid*/
770         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
771                 root_objectid = root->root_key.offset;
772                 fprintf(stderr, "reloc");
773         }
774         fprintf(stderr, "root %llu inode %llu errors %x",
775                 (unsigned long long) root_objectid,
776                 (unsigned long long) rec->ino, rec->errors);
777
778         if (errors & I_ERR_NO_INODE_ITEM)
779                 fprintf(stderr, ", no inode item");
780         if (errors & I_ERR_NO_ORPHAN_ITEM)
781                 fprintf(stderr, ", no orphan item");
782         if (errors & I_ERR_DUP_INODE_ITEM)
783                 fprintf(stderr, ", dup inode item");
784         if (errors & I_ERR_DUP_DIR_INDEX)
785                 fprintf(stderr, ", dup dir index");
786         if (errors & I_ERR_ODD_DIR_ITEM)
787                 fprintf(stderr, ", odd dir item");
788         if (errors & I_ERR_ODD_FILE_EXTENT)
789                 fprintf(stderr, ", odd file extent");
790         if (errors & I_ERR_BAD_FILE_EXTENT)
791                 fprintf(stderr, ", bad file extent");
792         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
793                 fprintf(stderr, ", file extent overlap");
794         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
795                 fprintf(stderr, ", file extent discount");
796         if (errors & I_ERR_DIR_ISIZE_WRONG)
797                 fprintf(stderr, ", dir isize wrong");
798         if (errors & I_ERR_FILE_NBYTES_WRONG)
799                 fprintf(stderr, ", nbytes wrong");
800         if (errors & I_ERR_ODD_CSUM_ITEM)
801                 fprintf(stderr, ", odd csum item");
802         if (errors & I_ERR_SOME_CSUM_MISSING)
803                 fprintf(stderr, ", some csum missing");
804         if (errors & I_ERR_LINK_COUNT_WRONG)
805                 fprintf(stderr, ", link count wrong");
806         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
807                 fprintf(stderr, ", orphan file extent");
808         fprintf(stderr, "\n");
809         /* Print the orphan extents if needed */
810         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
811                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
812
813         /* Print the holes if needed */
814         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
815                 struct file_extent_hole *hole;
816                 struct rb_node *node;
817                 int found = 0;
818
819                 node = rb_first(&rec->holes);
820                 fprintf(stderr, "Found file extent holes:\n");
821                 while (node) {
822                         found = 1;
823                         hole = rb_entry(node, struct file_extent_hole, node);
824                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
825                                 hole->start, hole->len);
826                         node = rb_next(node);
827                 }
828                 if (!found)
829                         fprintf(stderr, "\tstart: 0, len: %llu\n",
830                                 round_up(rec->isize,
831                                          root->fs_info->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct extent_buffer *eb,
1481                             int slot, struct btrfs_key *key,
1482                             struct shared_node *active_node)
1483 {
1484         u32 total;
1485         u32 cur = 0;
1486         u32 len;
1487         u32 name_len;
1488         u32 data_len;
1489         int error;
1490         int nritems = 0;
1491         u8 filetype;
1492         struct btrfs_dir_item *di;
1493         struct inode_record *rec;
1494         struct cache_tree *root_cache;
1495         struct cache_tree *inode_cache;
1496         struct btrfs_key location;
1497         char namebuf[BTRFS_NAME_LEN];
1498
1499         root_cache = &active_node->root_cache;
1500         inode_cache = &active_node->inode_cache;
1501         rec = active_node->current;
1502         rec->found_dir_item = 1;
1503
1504         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1505         total = btrfs_item_size_nr(eb, slot);
1506         while (cur < total) {
1507                 nritems++;
1508                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1509                 name_len = btrfs_dir_name_len(eb, di);
1510                 data_len = btrfs_dir_data_len(eb, di);
1511                 filetype = btrfs_dir_type(eb, di);
1512
1513                 rec->found_size += name_len;
1514                 if (cur + sizeof(*di) + name_len > total ||
1515                     name_len > BTRFS_NAME_LEN) {
1516                         error = REF_ERR_NAME_TOO_LONG;
1517
1518                         if (cur + sizeof(*di) > total)
1519                                 break;
1520                         len = min_t(u32, total - cur - sizeof(*di),
1521                                     BTRFS_NAME_LEN);
1522                 } else {
1523                         len = name_len;
1524                         error = 0;
1525                 }
1526
1527                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1528
1529                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1530                     key->offset != btrfs_name_hash(namebuf, len)) {
1531                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1532                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1533                         key->objectid, key->offset, namebuf, len, filetype,
1534                         key->offset, btrfs_name_hash(namebuf, len));
1535                 }
1536
1537                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1538                         add_inode_backref(inode_cache, location.objectid,
1539                                           key->objectid, key->offset, namebuf,
1540                                           len, filetype, key->type, error);
1541                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1542                         add_inode_backref(root_cache, location.objectid,
1543                                           key->objectid, key->offset,
1544                                           namebuf, len, filetype,
1545                                           key->type, error);
1546                 } else {
1547                         fprintf(stderr, "invalid location in dir item %u\n",
1548                                 location.type);
1549                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1550                                           key->objectid, key->offset, namebuf,
1551                                           len, filetype, key->type, error);
1552                 }
1553
1554                 len = sizeof(*di) + name_len + data_len;
1555                 di = (struct btrfs_dir_item *)((char *)di + len);
1556                 cur += len;
1557         }
1558         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1559                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1560
1561         return 0;
1562 }
1563
1564 static int process_inode_ref(struct extent_buffer *eb,
1565                              int slot, struct btrfs_key *key,
1566                              struct shared_node *active_node)
1567 {
1568         u32 total;
1569         u32 cur = 0;
1570         u32 len;
1571         u32 name_len;
1572         u64 index;
1573         int error;
1574         struct cache_tree *inode_cache;
1575         struct btrfs_inode_ref *ref;
1576         char namebuf[BTRFS_NAME_LEN];
1577
1578         inode_cache = &active_node->inode_cache;
1579
1580         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1581         total = btrfs_item_size_nr(eb, slot);
1582         while (cur < total) {
1583                 name_len = btrfs_inode_ref_name_len(eb, ref);
1584                 index = btrfs_inode_ref_index(eb, ref);
1585
1586                 /* inode_ref + namelen should not cross item boundary */
1587                 if (cur + sizeof(*ref) + name_len > total ||
1588                     name_len > BTRFS_NAME_LEN) {
1589                         if (total < cur + sizeof(*ref))
1590                                 break;
1591
1592                         /* Still try to read out the remaining part */
1593                         len = min_t(u32, total - cur - sizeof(*ref),
1594                                     BTRFS_NAME_LEN);
1595                         error = REF_ERR_NAME_TOO_LONG;
1596                 } else {
1597                         len = name_len;
1598                         error = 0;
1599                 }
1600
1601                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1602                 add_inode_backref(inode_cache, key->objectid, key->offset,
1603                                   index, namebuf, len, 0, key->type, error);
1604
1605                 len = sizeof(*ref) + name_len;
1606                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1607                 cur += len;
1608         }
1609         return 0;
1610 }
1611
1612 static int process_inode_extref(struct extent_buffer *eb,
1613                                 int slot, struct btrfs_key *key,
1614                                 struct shared_node *active_node)
1615 {
1616         u32 total;
1617         u32 cur = 0;
1618         u32 len;
1619         u32 name_len;
1620         u64 index;
1621         u64 parent;
1622         int error;
1623         struct cache_tree *inode_cache;
1624         struct btrfs_inode_extref *extref;
1625         char namebuf[BTRFS_NAME_LEN];
1626
1627         inode_cache = &active_node->inode_cache;
1628
1629         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1630         total = btrfs_item_size_nr(eb, slot);
1631         while (cur < total) {
1632                 name_len = btrfs_inode_extref_name_len(eb, extref);
1633                 index = btrfs_inode_extref_index(eb, extref);
1634                 parent = btrfs_inode_extref_parent(eb, extref);
1635                 if (name_len <= BTRFS_NAME_LEN) {
1636                         len = name_len;
1637                         error = 0;
1638                 } else {
1639                         len = BTRFS_NAME_LEN;
1640                         error = REF_ERR_NAME_TOO_LONG;
1641                 }
1642                 read_extent_buffer(eb, namebuf,
1643                                    (unsigned long)(extref + 1), len);
1644                 add_inode_backref(inode_cache, key->objectid, parent,
1645                                   index, namebuf, len, 0, key->type, error);
1646
1647                 len = sizeof(*extref) + name_len;
1648                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1649                 cur += len;
1650         }
1651         return 0;
1652
1653 }
1654
1655 static int count_csum_range(struct btrfs_root *root, u64 start,
1656                             u64 len, u64 *found)
1657 {
1658         struct btrfs_key key;
1659         struct btrfs_path path;
1660         struct extent_buffer *leaf;
1661         int ret;
1662         size_t size;
1663         *found = 0;
1664         u64 csum_end;
1665         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1666
1667         btrfs_init_path(&path);
1668
1669         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1670         key.offset = start;
1671         key.type = BTRFS_EXTENT_CSUM_KEY;
1672
1673         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1674                                 &key, &path, 0, 0);
1675         if (ret < 0)
1676                 goto out;
1677         if (ret > 0 && path.slots[0] > 0) {
1678                 leaf = path.nodes[0];
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1680                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1681                     key.type == BTRFS_EXTENT_CSUM_KEY)
1682                         path.slots[0]--;
1683         }
1684
1685         while (len > 0) {
1686                 leaf = path.nodes[0];
1687                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1688                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1689                         if (ret > 0)
1690                                 break;
1691                         else if (ret < 0)
1692                                 goto out;
1693                         leaf = path.nodes[0];
1694                 }
1695
1696                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1697                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1698                     key.type != BTRFS_EXTENT_CSUM_KEY)
1699                         break;
1700
1701                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1702                 if (key.offset >= start + len)
1703                         break;
1704
1705                 if (key.offset > start)
1706                         start = key.offset;
1707
1708                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1709                 csum_end = key.offset + (size / csum_size) *
1710                            root->fs_info->sectorsize;
1711                 if (csum_end > start) {
1712                         size = min(csum_end - start, len);
1713                         len -= size;
1714                         start += size;
1715                         *found += size;
1716                 }
1717
1718                 path.slots[0]++;
1719         }
1720 out:
1721         btrfs_release_path(&path);
1722         if (ret < 0)
1723                 return ret;
1724         return 0;
1725 }
1726
1727 static int process_file_extent(struct btrfs_root *root,
1728                                 struct extent_buffer *eb,
1729                                 int slot, struct btrfs_key *key,
1730                                 struct shared_node *active_node)
1731 {
1732         struct inode_record *rec;
1733         struct btrfs_file_extent_item *fi;
1734         u64 num_bytes = 0;
1735         u64 disk_bytenr = 0;
1736         u64 extent_offset = 0;
1737         u64 mask = root->fs_info->sectorsize - 1;
1738         int extent_type;
1739         int ret;
1740
1741         rec = active_node->current;
1742         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743         rec->found_file_extent = 1;
1744
1745         if (rec->extent_start == (u64)-1) {
1746                 rec->extent_start = key->offset;
1747                 rec->extent_end = key->offset;
1748         }
1749
1750         if (rec->extent_end > key->offset)
1751                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752         else if (rec->extent_end < key->offset) {
1753                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754                                            key->offset - rec->extent_end);
1755                 if (ret < 0)
1756                         return ret;
1757         }
1758
1759         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760         extent_type = btrfs_file_extent_type(eb, fi);
1761
1762         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1764                 if (num_bytes == 0)
1765                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766                 rec->found_size += num_bytes;
1767                 num_bytes = (num_bytes + mask) & ~mask;
1768         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772                 extent_offset = btrfs_file_extent_offset(eb, fi);
1773                 if (num_bytes == 0 || (num_bytes & mask))
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 if (num_bytes + extent_offset >
1776                     btrfs_file_extent_ram_bytes(eb, fi))
1777                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779                     (btrfs_file_extent_compression(eb, fi) ||
1780                      btrfs_file_extent_encryption(eb, fi) ||
1781                      btrfs_file_extent_other_encoding(eb, fi)))
1782                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783                 if (disk_bytenr > 0)
1784                         rec->found_size += num_bytes;
1785         } else {
1786                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787         }
1788         rec->extent_end = key->offset + num_bytes;
1789
1790         /*
1791          * The data reloc tree will copy full extents into its inode and then
1792          * copy the corresponding csums.  Because the extent it copied could be
1793          * a preallocated extent that hasn't been written to yet there may be no
1794          * csums to copy, ergo we won't have csums for our file extent.  This is
1795          * ok so just don't bother checking csums if the inode belongs to the
1796          * data reloc tree.
1797          */
1798         if (disk_bytenr > 0 &&
1799             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1800                 u64 found;
1801                 if (btrfs_file_extent_compression(eb, fi))
1802                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1803                 else
1804                         disk_bytenr += extent_offset;
1805
1806                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1807                 if (ret < 0)
1808                         return ret;
1809                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1810                         if (found > 0)
1811                                 rec->found_csum_item = 1;
1812                         if (found < num_bytes)
1813                                 rec->some_csum_missing = 1;
1814                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1815                         if (found > 0)
1816                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1817                 }
1818         }
1819         return 0;
1820 }
1821
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823                             struct walk_control *wc)
1824 {
1825         struct btrfs_key key;
1826         u32 nritems;
1827         int i;
1828         int ret = 0;
1829         struct cache_tree *inode_cache;
1830         struct shared_node *active_node;
1831
1832         if (wc->root_level == wc->active_node &&
1833             btrfs_root_refs(&root->root_item) == 0)
1834                 return 0;
1835
1836         active_node = wc->nodes[wc->active_node];
1837         inode_cache = &active_node->inode_cache;
1838         nritems = btrfs_header_nritems(eb);
1839         for (i = 0; i < nritems; i++) {
1840                 btrfs_item_key_to_cpu(eb, &key, i);
1841
1842                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1843                         continue;
1844                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1845                         continue;
1846
1847                 if (active_node->current == NULL ||
1848                     active_node->current->ino < key.objectid) {
1849                         if (active_node->current) {
1850                                 active_node->current->checked = 1;
1851                                 maybe_free_inode_rec(inode_cache,
1852                                                      active_node->current);
1853                         }
1854                         active_node->current = get_inode_rec(inode_cache,
1855                                                              key.objectid, 1);
1856                         BUG_ON(IS_ERR(active_node->current));
1857                 }
1858                 switch (key.type) {
1859                 case BTRFS_DIR_ITEM_KEY:
1860                 case BTRFS_DIR_INDEX_KEY:
1861                         ret = process_dir_item(eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_REF_KEY:
1864                         ret = process_inode_ref(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_INODE_EXTREF_KEY:
1867                         ret = process_inode_extref(eb, i, &key, active_node);
1868                         break;
1869                 case BTRFS_INODE_ITEM_KEY:
1870                         ret = process_inode_item(eb, i, &key, active_node);
1871                         break;
1872                 case BTRFS_EXTENT_DATA_KEY:
1873                         ret = process_file_extent(root, eb, i, &key,
1874                                                   active_node);
1875                         break;
1876                 default:
1877                         break;
1878                 };
1879         }
1880         return ret;
1881 }
1882
1883 struct node_refs {
1884         u64 bytenr[BTRFS_MAX_LEVEL];
1885         u64 refs[BTRFS_MAX_LEVEL];
1886         int need_check[BTRFS_MAX_LEVEL];
1887 };
1888
1889 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1890                              struct node_refs *nrefs, u64 level);
1891 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1892                             unsigned int ext_ref);
1893
1894 /*
1895  * Returns >0  Found error, not fatal, should continue
1896  * Returns <0  Fatal error, must exit the whole check
1897  * Returns 0   No errors found
1898  */
1899 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1900                                struct node_refs *nrefs, int *level, int ext_ref)
1901 {
1902         struct extent_buffer *cur = path->nodes[0];
1903         struct btrfs_key key;
1904         u64 cur_bytenr;
1905         u32 nritems;
1906         u64 first_ino = 0;
1907         int root_level = btrfs_header_level(root->node);
1908         int i;
1909         int ret = 0; /* Final return value */
1910         int err = 0; /* Positive error bitmap */
1911
1912         cur_bytenr = cur->start;
1913
1914         /* skip to first inode item or the first inode number change */
1915         nritems = btrfs_header_nritems(cur);
1916         for (i = 0; i < nritems; i++) {
1917                 btrfs_item_key_to_cpu(cur, &key, i);
1918                 if (i == 0)
1919                         first_ino = key.objectid;
1920                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1921                     (first_ino && first_ino != key.objectid))
1922                         break;
1923         }
1924         if (i == nritems) {
1925                 path->slots[0] = nritems;
1926                 return 0;
1927         }
1928         path->slots[0] = i;
1929
1930 again:
1931         err |= check_inode_item(root, path, ext_ref);
1932
1933         if (err & LAST_ITEM)
1934                 goto out;
1935
1936         /* still have inode items in thie leaf */
1937         if (cur->start == cur_bytenr)
1938                 goto again;
1939
1940         /*
1941          * we have switched to another leaf, above nodes may
1942          * have changed, here walk down the path, if a node
1943          * or leaf is shared, check whether we can skip this
1944          * node or leaf.
1945          */
1946         for (i = root_level; i >= 0; i--) {
1947                 if (path->nodes[i]->start == nrefs->bytenr[i])
1948                         continue;
1949
1950                 ret = update_nodes_refs(root,
1951                                 path->nodes[i]->start,
1952                                 nrefs, i);
1953                 if (ret)
1954                         goto out;
1955
1956                 if (!nrefs->need_check[i]) {
1957                         *level += 1;
1958                         break;
1959                 }
1960         }
1961
1962         for (i = 0; i < *level; i++) {
1963                 free_extent_buffer(path->nodes[i]);
1964                 path->nodes[i] = NULL;
1965         }
1966 out:
1967         err &= ~LAST_ITEM;
1968         if (err && !ret)
1969                 ret = err;
1970         return ret;
1971 }
1972
1973 static void reada_walk_down(struct btrfs_root *root,
1974                             struct extent_buffer *node, int slot)
1975 {
1976         struct btrfs_fs_info *fs_info = root->fs_info;
1977         u64 bytenr;
1978         u64 ptr_gen;
1979         u32 nritems;
1980         int i;
1981         int level;
1982
1983         level = btrfs_header_level(node);
1984         if (level != 1)
1985                 return;
1986
1987         nritems = btrfs_header_nritems(node);
1988         for (i = slot; i < nritems; i++) {
1989                 bytenr = btrfs_node_blockptr(node, i);
1990                 ptr_gen = btrfs_node_ptr_generation(node, i);
1991                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1992         }
1993 }
1994
1995 /*
1996  * Check the child node/leaf by the following condition:
1997  * 1. the first item key of the node/leaf should be the same with the one
1998  *    in parent.
1999  * 2. block in parent node should match the child node/leaf.
2000  * 3. generation of parent node and child's header should be consistent.
2001  *
2002  * Or the child node/leaf pointed by the key in parent is not valid.
2003  *
2004  * We hope to check leaf owner too, but since subvol may share leaves,
2005  * which makes leaf owner check not so strong, key check should be
2006  * sufficient enough for that case.
2007  */
2008 static int check_child_node(struct extent_buffer *parent, int slot,
2009                             struct extent_buffer *child)
2010 {
2011         struct btrfs_key parent_key;
2012         struct btrfs_key child_key;
2013         int ret = 0;
2014
2015         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2016         if (btrfs_header_level(child) == 0)
2017                 btrfs_item_key_to_cpu(child, &child_key, 0);
2018         else
2019                 btrfs_node_key_to_cpu(child, &child_key, 0);
2020
2021         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2022                 ret = -EINVAL;
2023                 fprintf(stderr,
2024                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2025                         parent_key.objectid, parent_key.type, parent_key.offset,
2026                         child_key.objectid, child_key.type, child_key.offset);
2027         }
2028         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2029                 ret = -EINVAL;
2030                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2031                         btrfs_node_blockptr(parent, slot),
2032                         btrfs_header_bytenr(child));
2033         }
2034         if (btrfs_node_ptr_generation(parent, slot) !=
2035             btrfs_header_generation(child)) {
2036                 ret = -EINVAL;
2037                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2038                         btrfs_header_generation(child),
2039                         btrfs_node_ptr_generation(parent, slot));
2040         }
2041         return ret;
2042 }
2043
2044 /*
2045  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2046  * in every fs or file tree check. Here we find its all root ids, and only check
2047  * it in the fs or file tree which has the smallest root id.
2048  */
2049 static int need_check(struct btrfs_root *root, struct ulist *roots)
2050 {
2051         struct rb_node *node;
2052         struct ulist_node *u;
2053
2054         if (roots->nnodes == 1)
2055                 return 1;
2056
2057         node = rb_first(&roots->root);
2058         u = rb_entry(node, struct ulist_node, rb_node);
2059         /*
2060          * current root id is not smallest, we skip it and let it be checked
2061          * in the fs or file tree who hash the smallest root id.
2062          */
2063         if (root->objectid != u->val)
2064                 return 0;
2065
2066         return 1;
2067 }
2068
2069 /*
2070  * for a tree node or leaf, we record its reference count, so later if we still
2071  * process this node or leaf, don't need to compute its reference count again.
2072  */
2073 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2074                              struct node_refs *nrefs, u64 level)
2075 {
2076         int check, ret;
2077         u64 refs;
2078         struct ulist *roots;
2079
2080         if (nrefs->bytenr[level] != bytenr) {
2081                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2082                                        level, 1, &refs, NULL);
2083                 if (ret < 0)
2084                         return ret;
2085
2086                 nrefs->bytenr[level] = bytenr;
2087                 nrefs->refs[level] = refs;
2088                 if (refs > 1) {
2089                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2090                                                    0, &roots);
2091                         if (ret)
2092                                 return -EIO;
2093
2094                         check = need_check(root, roots);
2095                         ulist_free(roots);
2096                         nrefs->need_check[level] = check;
2097                 } else {
2098                         nrefs->need_check[level] = 1;
2099                 }
2100         }
2101
2102         return 0;
2103 }
2104
2105 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2106                           struct walk_control *wc, int *level,
2107                           struct node_refs *nrefs)
2108 {
2109         enum btrfs_tree_block_status status;
2110         u64 bytenr;
2111         u64 ptr_gen;
2112         struct btrfs_fs_info *fs_info = root->fs_info;
2113         struct extent_buffer *next;
2114         struct extent_buffer *cur;
2115         int ret, err = 0;
2116         u64 refs;
2117
2118         WARN_ON(*level < 0);
2119         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2120
2121         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2122                 refs = nrefs->refs[*level];
2123                 ret = 0;
2124         } else {
2125                 ret = btrfs_lookup_extent_info(NULL, root,
2126                                        path->nodes[*level]->start,
2127                                        *level, 1, &refs, NULL);
2128                 if (ret < 0) {
2129                         err = ret;
2130                         goto out;
2131                 }
2132                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2133                 nrefs->refs[*level] = refs;
2134         }
2135
2136         if (refs > 1) {
2137                 ret = enter_shared_node(root, path->nodes[*level]->start,
2138                                         refs, wc, *level);
2139                 if (ret > 0) {
2140                         err = ret;
2141                         goto out;
2142                 }
2143         }
2144
2145         while (*level >= 0) {
2146                 WARN_ON(*level < 0);
2147                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2148                 cur = path->nodes[*level];
2149
2150                 if (btrfs_header_level(cur) != *level)
2151                         WARN_ON(1);
2152
2153                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2154                         break;
2155                 if (*level == 0) {
2156                         ret = process_one_leaf(root, cur, wc);
2157                         if (ret < 0)
2158                                 err = ret;
2159                         break;
2160                 }
2161                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2162                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2163
2164                 if (bytenr == nrefs->bytenr[*level - 1]) {
2165                         refs = nrefs->refs[*level - 1];
2166                 } else {
2167                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2168                                         *level - 1, 1, &refs, NULL);
2169                         if (ret < 0) {
2170                                 refs = 0;
2171                         } else {
2172                                 nrefs->bytenr[*level - 1] = bytenr;
2173                                 nrefs->refs[*level - 1] = refs;
2174                         }
2175                 }
2176
2177                 if (refs > 1) {
2178                         ret = enter_shared_node(root, bytenr, refs,
2179                                                 wc, *level - 1);
2180                         if (ret > 0) {
2181                                 path->slots[*level]++;
2182                                 continue;
2183                         }
2184                 }
2185
2186                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2187                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2188                         free_extent_buffer(next);
2189                         reada_walk_down(root, cur, path->slots[*level]);
2190                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2191                         if (!extent_buffer_uptodate(next)) {
2192                                 struct btrfs_key node_key;
2193
2194                                 btrfs_node_key_to_cpu(path->nodes[*level],
2195                                                       &node_key,
2196                                                       path->slots[*level]);
2197                                 btrfs_add_corrupt_extent_record(root->fs_info,
2198                                                 &node_key,
2199                                                 path->nodes[*level]->start,
2200                                                 root->fs_info->nodesize,
2201                                                 *level);
2202                                 err = -EIO;
2203                                 goto out;
2204                         }
2205                 }
2206
2207                 ret = check_child_node(cur, path->slots[*level], next);
2208                 if (ret) {
2209                         free_extent_buffer(next);
2210                         err = ret;
2211                         goto out;
2212                 }
2213
2214                 if (btrfs_is_leaf(next))
2215                         status = btrfs_check_leaf(root, NULL, next);
2216                 else
2217                         status = btrfs_check_node(root, NULL, next);
2218                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2219                         free_extent_buffer(next);
2220                         err = -EIO;
2221                         goto out;
2222                 }
2223
2224                 *level = *level - 1;
2225                 free_extent_buffer(path->nodes[*level]);
2226                 path->nodes[*level] = next;
2227                 path->slots[*level] = 0;
2228         }
2229 out:
2230         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2231         return err;
2232 }
2233
2234 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2235                             unsigned int ext_ref);
2236
2237 /*
2238  * Returns >0  Found error, should continue
2239  * Returns <0  Fatal error, must exit the whole check
2240  * Returns 0   No errors found
2241  */
2242 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2243                              int *level, struct node_refs *nrefs, int ext_ref)
2244 {
2245         enum btrfs_tree_block_status status;
2246         u64 bytenr;
2247         u64 ptr_gen;
2248         struct btrfs_fs_info *fs_info = root->fs_info;
2249         struct extent_buffer *next;
2250         struct extent_buffer *cur;
2251         int ret;
2252
2253         WARN_ON(*level < 0);
2254         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2255
2256         ret = update_nodes_refs(root, path->nodes[*level]->start,
2257                                 nrefs, *level);
2258         if (ret < 0)
2259                 return ret;
2260
2261         while (*level >= 0) {
2262                 WARN_ON(*level < 0);
2263                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264                 cur = path->nodes[*level];
2265
2266                 if (btrfs_header_level(cur) != *level)
2267                         WARN_ON(1);
2268
2269                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2270                         break;
2271                 /* Don't forgot to check leaf/node validation */
2272                 if (*level == 0) {
2273                         ret = btrfs_check_leaf(root, NULL, cur);
2274                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2275                                 ret = -EIO;
2276                                 break;
2277                         }
2278                         ret = process_one_leaf_v2(root, path, nrefs,
2279                                                   level, ext_ref);
2280                         break;
2281                 } else {
2282                         ret = btrfs_check_node(root, NULL, cur);
2283                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2284                                 ret = -EIO;
2285                                 break;
2286                         }
2287                 }
2288                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2290
2291                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2292                 if (ret)
2293                         break;
2294                 if (!nrefs->need_check[*level - 1]) {
2295                         path->slots[*level]++;
2296                         continue;
2297                 }
2298
2299                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2300                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301                         free_extent_buffer(next);
2302                         reada_walk_down(root, cur, path->slots[*level]);
2303                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2304                         if (!extent_buffer_uptodate(next)) {
2305                                 struct btrfs_key node_key;
2306
2307                                 btrfs_node_key_to_cpu(path->nodes[*level],
2308                                                       &node_key,
2309                                                       path->slots[*level]);
2310                                 btrfs_add_corrupt_extent_record(fs_info,
2311                                                 &node_key,
2312                                                 path->nodes[*level]->start,
2313                                                 fs_info->nodesize,
2314                                                 *level);
2315                                 ret = -EIO;
2316                                 break;
2317                         }
2318                 }
2319
2320                 ret = check_child_node(cur, path->slots[*level], next);
2321                 if (ret < 0) 
2322                         break;
2323
2324                 if (btrfs_is_leaf(next))
2325                         status = btrfs_check_leaf(root, NULL, next);
2326                 else
2327                         status = btrfs_check_node(root, NULL, next);
2328                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2329                         free_extent_buffer(next);
2330                         ret = -EIO;
2331                         break;
2332                 }
2333
2334                 *level = *level - 1;
2335                 free_extent_buffer(path->nodes[*level]);
2336                 path->nodes[*level] = next;
2337                 path->slots[*level] = 0;
2338         }
2339         return ret;
2340 }
2341
2342 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2343                         struct walk_control *wc, int *level)
2344 {
2345         int i;
2346         struct extent_buffer *leaf;
2347
2348         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349                 leaf = path->nodes[i];
2350                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2351                         path->slots[i]++;
2352                         *level = i;
2353                         return 0;
2354                 } else {
2355                         free_extent_buffer(path->nodes[*level]);
2356                         path->nodes[*level] = NULL;
2357                         BUG_ON(*level > wc->active_node);
2358                         if (*level == wc->active_node)
2359                                 leave_shared_node(root, wc, *level);
2360                         *level = i + 1;
2361                 }
2362         }
2363         return 1;
2364 }
2365
2366 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2367                            int *level)
2368 {
2369         int i;
2370         struct extent_buffer *leaf;
2371
2372         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2373                 leaf = path->nodes[i];
2374                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2375                         path->slots[i]++;
2376                         *level = i;
2377                         return 0;
2378                 } else {
2379                         free_extent_buffer(path->nodes[*level]);
2380                         path->nodes[*level] = NULL;
2381                         *level = i + 1;
2382                 }
2383         }
2384         return 1;
2385 }
2386
2387 static int check_root_dir(struct inode_record *rec)
2388 {
2389         struct inode_backref *backref;
2390         int ret = -1;
2391
2392         if (!rec->found_inode_item || rec->errors)
2393                 goto out;
2394         if (rec->nlink != 1 || rec->found_link != 0)
2395                 goto out;
2396         if (list_empty(&rec->backrefs))
2397                 goto out;
2398         backref = to_inode_backref(rec->backrefs.next);
2399         if (!backref->found_inode_ref)
2400                 goto out;
2401         if (backref->index != 0 || backref->namelen != 2 ||
2402             memcmp(backref->name, "..", 2))
2403                 goto out;
2404         if (backref->found_dir_index || backref->found_dir_item)
2405                 goto out;
2406         ret = 0;
2407 out:
2408         return ret;
2409 }
2410
2411 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2412                               struct btrfs_root *root, struct btrfs_path *path,
2413                               struct inode_record *rec)
2414 {
2415         struct btrfs_inode_item *ei;
2416         struct btrfs_key key;
2417         int ret;
2418
2419         key.objectid = rec->ino;
2420         key.type = BTRFS_INODE_ITEM_KEY;
2421         key.offset = (u64)-1;
2422
2423         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2424         if (ret < 0)
2425                 goto out;
2426         if (ret) {
2427                 if (!path->slots[0]) {
2428                         ret = -ENOENT;
2429                         goto out;
2430                 }
2431                 path->slots[0]--;
2432                 ret = 0;
2433         }
2434         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2435         if (key.objectid != rec->ino) {
2436                 ret = -ENOENT;
2437                 goto out;
2438         }
2439
2440         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2441                             struct btrfs_inode_item);
2442         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2443         btrfs_mark_buffer_dirty(path->nodes[0]);
2444         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2445         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2446                root->root_key.objectid);
2447 out:
2448         btrfs_release_path(path);
2449         return ret;
2450 }
2451
2452 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2453                                     struct btrfs_root *root,
2454                                     struct btrfs_path *path,
2455                                     struct inode_record *rec)
2456 {
2457         int ret;
2458
2459         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2460         btrfs_release_path(path);
2461         if (!ret)
2462                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2463         return ret;
2464 }
2465
2466 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2467                                struct btrfs_root *root,
2468                                struct btrfs_path *path,
2469                                struct inode_record *rec)
2470 {
2471         struct btrfs_inode_item *ei;
2472         struct btrfs_key key;
2473         int ret = 0;
2474
2475         key.objectid = rec->ino;
2476         key.type = BTRFS_INODE_ITEM_KEY;
2477         key.offset = 0;
2478
2479         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2480         if (ret) {
2481                 if (ret > 0)
2482                         ret = -ENOENT;
2483                 goto out;
2484         }
2485
2486         /* Since ret == 0, no need to check anything */
2487         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2488                             struct btrfs_inode_item);
2489         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2490         btrfs_mark_buffer_dirty(path->nodes[0]);
2491         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2492         printf("reset nbytes for ino %llu root %llu\n",
2493                rec->ino, root->root_key.objectid);
2494 out:
2495         btrfs_release_path(path);
2496         return ret;
2497 }
2498
2499 static int add_missing_dir_index(struct btrfs_root *root,
2500                                  struct cache_tree *inode_cache,
2501                                  struct inode_record *rec,
2502                                  struct inode_backref *backref)
2503 {
2504         struct btrfs_path path;
2505         struct btrfs_trans_handle *trans;
2506         struct btrfs_dir_item *dir_item;
2507         struct extent_buffer *leaf;
2508         struct btrfs_key key;
2509         struct btrfs_disk_key disk_key;
2510         struct inode_record *dir_rec;
2511         unsigned long name_ptr;
2512         u32 data_size = sizeof(*dir_item) + backref->namelen;
2513         int ret;
2514
2515         trans = btrfs_start_transaction(root, 1);
2516         if (IS_ERR(trans))
2517                 return PTR_ERR(trans);
2518
2519         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2520                 (unsigned long long)rec->ino);
2521
2522         btrfs_init_path(&path);
2523         key.objectid = backref->dir;
2524         key.type = BTRFS_DIR_INDEX_KEY;
2525         key.offset = backref->index;
2526         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2527         BUG_ON(ret);
2528
2529         leaf = path.nodes[0];
2530         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2531
2532         disk_key.objectid = cpu_to_le64(rec->ino);
2533         disk_key.type = BTRFS_INODE_ITEM_KEY;
2534         disk_key.offset = 0;
2535
2536         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2537         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2538         btrfs_set_dir_data_len(leaf, dir_item, 0);
2539         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2540         name_ptr = (unsigned long)(dir_item + 1);
2541         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2542         btrfs_mark_buffer_dirty(leaf);
2543         btrfs_release_path(&path);
2544         btrfs_commit_transaction(trans, root);
2545
2546         backref->found_dir_index = 1;
2547         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2548         BUG_ON(IS_ERR(dir_rec));
2549         if (!dir_rec)
2550                 return 0;
2551         dir_rec->found_size += backref->namelen;
2552         if (dir_rec->found_size == dir_rec->isize &&
2553             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2554                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2555         if (dir_rec->found_size != dir_rec->isize)
2556                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2557
2558         return 0;
2559 }
2560
2561 static int delete_dir_index(struct btrfs_root *root,
2562                             struct inode_backref *backref)
2563 {
2564         struct btrfs_trans_handle *trans;
2565         struct btrfs_dir_item *di;
2566         struct btrfs_path path;
2567         int ret = 0;
2568
2569         trans = btrfs_start_transaction(root, 1);
2570         if (IS_ERR(trans))
2571                 return PTR_ERR(trans);
2572
2573         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2574                 (unsigned long long)backref->dir,
2575                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2576                 (unsigned long long)root->objectid);
2577
2578         btrfs_init_path(&path);
2579         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2580                                     backref->name, backref->namelen,
2581                                     backref->index, -1);
2582         if (IS_ERR(di)) {
2583                 ret = PTR_ERR(di);
2584                 btrfs_release_path(&path);
2585                 btrfs_commit_transaction(trans, root);
2586                 if (ret == -ENOENT)
2587                         return 0;
2588                 return ret;
2589         }
2590
2591         if (!di)
2592                 ret = btrfs_del_item(trans, root, &path);
2593         else
2594                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2595         BUG_ON(ret);
2596         btrfs_release_path(&path);
2597         btrfs_commit_transaction(trans, root);
2598         return ret;
2599 }
2600
2601 static int create_inode_item(struct btrfs_root *root,
2602                              struct inode_record *rec,
2603                              int root_dir)
2604 {
2605         struct btrfs_trans_handle *trans;
2606         struct btrfs_inode_item inode_item;
2607         time_t now = time(NULL);
2608         int ret;
2609
2610         trans = btrfs_start_transaction(root, 1);
2611         if (IS_ERR(trans)) {
2612                 ret = PTR_ERR(trans);
2613                 return ret;
2614         }
2615
2616         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2617                 "be incomplete, please check permissions and content after "
2618                 "the fsck completes.\n", (unsigned long long)root->objectid,
2619                 (unsigned long long)rec->ino);
2620
2621         memset(&inode_item, 0, sizeof(inode_item));
2622         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2623         if (root_dir)
2624                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2625         else
2626                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2627         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2628         if (rec->found_dir_item) {
2629                 if (rec->found_file_extent)
2630                         fprintf(stderr, "root %llu inode %llu has both a dir "
2631                                 "item and extents, unsure if it is a dir or a "
2632                                 "regular file so setting it as a directory\n",
2633                                 (unsigned long long)root->objectid,
2634                                 (unsigned long long)rec->ino);
2635                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2636                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2637         } else if (!rec->found_dir_item) {
2638                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2639                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2640         }
2641         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2642         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2643         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2644         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2645         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2646         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2647         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2648         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2649
2650         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2651         BUG_ON(ret);
2652         btrfs_commit_transaction(trans, root);
2653         return 0;
2654 }
2655
2656 static int repair_inode_backrefs(struct btrfs_root *root,
2657                                  struct inode_record *rec,
2658                                  struct cache_tree *inode_cache,
2659                                  int delete)
2660 {
2661         struct inode_backref *tmp, *backref;
2662         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2663         int ret = 0;
2664         int repaired = 0;
2665
2666         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2667                 if (!delete && rec->ino == root_dirid) {
2668                         if (!rec->found_inode_item) {
2669                                 ret = create_inode_item(root, rec, 1);
2670                                 if (ret)
2671                                         break;
2672                                 repaired++;
2673                         }
2674                 }
2675
2676                 /* Index 0 for root dir's are special, don't mess with it */
2677                 if (rec->ino == root_dirid && backref->index == 0)
2678                         continue;
2679
2680                 if (delete &&
2681                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2682                      (backref->found_dir_index && backref->found_inode_ref &&
2683                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2684                         ret = delete_dir_index(root, backref);
2685                         if (ret)
2686                                 break;
2687                         repaired++;
2688                         list_del(&backref->list);
2689                         free(backref);
2690                         continue;
2691                 }
2692
2693                 if (!delete && !backref->found_dir_index &&
2694                     backref->found_dir_item && backref->found_inode_ref) {
2695                         ret = add_missing_dir_index(root, inode_cache, rec,
2696                                                     backref);
2697                         if (ret)
2698                                 break;
2699                         repaired++;
2700                         if (backref->found_dir_item &&
2701                             backref->found_dir_index) {
2702                                 if (!backref->errors &&
2703                                     backref->found_inode_ref) {
2704                                         list_del(&backref->list);
2705                                         free(backref);
2706                                         continue;
2707                                 }
2708                         }
2709                 }
2710
2711                 if (!delete && (!backref->found_dir_index &&
2712                                 !backref->found_dir_item &&
2713                                 backref->found_inode_ref)) {
2714                         struct btrfs_trans_handle *trans;
2715                         struct btrfs_key location;
2716
2717                         ret = check_dir_conflict(root, backref->name,
2718                                                  backref->namelen,
2719                                                  backref->dir,
2720                                                  backref->index);
2721                         if (ret) {
2722                                 /*
2723                                  * let nlink fixing routine to handle it,
2724                                  * which can do it better.
2725                                  */
2726                                 ret = 0;
2727                                 break;
2728                         }
2729                         location.objectid = rec->ino;
2730                         location.type = BTRFS_INODE_ITEM_KEY;
2731                         location.offset = 0;
2732
2733                         trans = btrfs_start_transaction(root, 1);
2734                         if (IS_ERR(trans)) {
2735                                 ret = PTR_ERR(trans);
2736                                 break;
2737                         }
2738                         fprintf(stderr, "adding missing dir index/item pair "
2739                                 "for inode %llu\n",
2740                                 (unsigned long long)rec->ino);
2741                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2742                                                     backref->namelen,
2743                                                     backref->dir, &location,
2744                                                     imode_to_type(rec->imode),
2745                                                     backref->index);
2746                         BUG_ON(ret);
2747                         btrfs_commit_transaction(trans, root);
2748                         repaired++;
2749                 }
2750
2751                 if (!delete && (backref->found_inode_ref &&
2752                                 backref->found_dir_index &&
2753                                 backref->found_dir_item &&
2754                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2755                                 !rec->found_inode_item)) {
2756                         ret = create_inode_item(root, rec, 0);
2757                         if (ret)
2758                                 break;
2759                         repaired++;
2760                 }
2761
2762         }
2763         return ret ? ret : repaired;
2764 }
2765
2766 /*
2767  * To determine the file type for nlink/inode_item repair
2768  *
2769  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2770  * Return -ENOENT if file type is not found.
2771  */
2772 static int find_file_type(struct inode_record *rec, u8 *type)
2773 {
2774         struct inode_backref *backref;
2775
2776         /* For inode item recovered case */
2777         if (rec->found_inode_item) {
2778                 *type = imode_to_type(rec->imode);
2779                 return 0;
2780         }
2781
2782         list_for_each_entry(backref, &rec->backrefs, list) {
2783                 if (backref->found_dir_index || backref->found_dir_item) {
2784                         *type = backref->filetype;
2785                         return 0;
2786                 }
2787         }
2788         return -ENOENT;
2789 }
2790
2791 /*
2792  * To determine the file name for nlink repair
2793  *
2794  * Return 0 if file name is found, set name and namelen.
2795  * Return -ENOENT if file name is not found.
2796  */
2797 static int find_file_name(struct inode_record *rec,
2798                           char *name, int *namelen)
2799 {
2800         struct inode_backref *backref;
2801
2802         list_for_each_entry(backref, &rec->backrefs, list) {
2803                 if (backref->found_dir_index || backref->found_dir_item ||
2804                     backref->found_inode_ref) {
2805                         memcpy(name, backref->name, backref->namelen);
2806                         *namelen = backref->namelen;
2807                         return 0;
2808                 }
2809         }
2810         return -ENOENT;
2811 }
2812
2813 /* Reset the nlink of the inode to the correct one */
2814 static int reset_nlink(struct btrfs_trans_handle *trans,
2815                        struct btrfs_root *root,
2816                        struct btrfs_path *path,
2817                        struct inode_record *rec)
2818 {
2819         struct inode_backref *backref;
2820         struct inode_backref *tmp;
2821         struct btrfs_key key;
2822         struct btrfs_inode_item *inode_item;
2823         int ret = 0;
2824
2825         /* We don't believe this either, reset it and iterate backref */
2826         rec->found_link = 0;
2827
2828         /* Remove all backref including the valid ones */
2829         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2830                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2831                                    backref->index, backref->name,
2832                                    backref->namelen, 0);
2833                 if (ret < 0)
2834                         goto out;
2835
2836                 /* remove invalid backref, so it won't be added back */
2837                 if (!(backref->found_dir_index &&
2838                       backref->found_dir_item &&
2839                       backref->found_inode_ref)) {
2840                         list_del(&backref->list);
2841                         free(backref);
2842                 } else {
2843                         rec->found_link++;
2844                 }
2845         }
2846
2847         /* Set nlink to 0 */
2848         key.objectid = rec->ino;
2849         key.type = BTRFS_INODE_ITEM_KEY;
2850         key.offset = 0;
2851         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2852         if (ret < 0)
2853                 goto out;
2854         if (ret > 0) {
2855                 ret = -ENOENT;
2856                 goto out;
2857         }
2858         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2859                                     struct btrfs_inode_item);
2860         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2861         btrfs_mark_buffer_dirty(path->nodes[0]);
2862         btrfs_release_path(path);
2863
2864         /*
2865          * Add back valid inode_ref/dir_item/dir_index,
2866          * add_link() will handle the nlink inc, so new nlink must be correct
2867          */
2868         list_for_each_entry(backref, &rec->backrefs, list) {
2869                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2870                                      backref->name, backref->namelen,
2871                                      backref->filetype, &backref->index, 1);
2872                 if (ret < 0)
2873                         goto out;
2874         }
2875 out:
2876         btrfs_release_path(path);
2877         return ret;
2878 }
2879
2880 static int get_highest_inode(struct btrfs_trans_handle *trans,
2881                                 struct btrfs_root *root,
2882                                 struct btrfs_path *path,
2883                                 u64 *highest_ino)
2884 {
2885         struct btrfs_key key, found_key;
2886         int ret;
2887
2888         btrfs_init_path(path);
2889         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2890         key.offset = -1;
2891         key.type = BTRFS_INODE_ITEM_KEY;
2892         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2893         if (ret == 1) {
2894                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2895                                 path->slots[0] - 1);
2896                 *highest_ino = found_key.objectid;
2897                 ret = 0;
2898         }
2899         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2900                 ret = -EOVERFLOW;
2901         btrfs_release_path(path);
2902         return ret;
2903 }
2904
2905 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2906                                struct btrfs_root *root,
2907                                struct btrfs_path *path,
2908                                struct inode_record *rec)
2909 {
2910         char *dir_name = "lost+found";
2911         char namebuf[BTRFS_NAME_LEN] = {0};
2912         u64 lost_found_ino;
2913         u32 mode = 0700;
2914         u8 type = 0;
2915         int namelen = 0;
2916         int name_recovered = 0;
2917         int type_recovered = 0;
2918         int ret = 0;
2919
2920         /*
2921          * Get file name and type first before these invalid inode ref
2922          * are deleted by remove_all_invalid_backref()
2923          */
2924         name_recovered = !find_file_name(rec, namebuf, &namelen);
2925         type_recovered = !find_file_type(rec, &type);
2926
2927         if (!name_recovered) {
2928                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2929                        rec->ino, rec->ino);
2930                 namelen = count_digits(rec->ino);
2931                 sprintf(namebuf, "%llu", rec->ino);
2932                 name_recovered = 1;
2933         }
2934         if (!type_recovered) {
2935                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2936                        rec->ino);
2937                 type = BTRFS_FT_REG_FILE;
2938                 type_recovered = 1;
2939         }
2940
2941         ret = reset_nlink(trans, root, path, rec);
2942         if (ret < 0) {
2943                 fprintf(stderr,
2944                         "Failed to reset nlink for inode %llu: %s\n",
2945                         rec->ino, strerror(-ret));
2946                 goto out;
2947         }
2948
2949         if (rec->found_link == 0) {
2950                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2951                 if (ret < 0)
2952                         goto out;
2953                 lost_found_ino++;
2954                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2955                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2956                                   mode);
2957                 if (ret < 0) {
2958                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2959                                 dir_name, strerror(-ret));
2960                         goto out;
2961                 }
2962                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2963                                      namebuf, namelen, type, NULL, 1);
2964                 /*
2965                  * Add ".INO" suffix several times to handle case where
2966                  * "FILENAME.INO" is already taken by another file.
2967                  */
2968                 while (ret == -EEXIST) {
2969                         /*
2970                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2971                          */
2972                         if (namelen + count_digits(rec->ino) + 1 >
2973                             BTRFS_NAME_LEN) {
2974                                 ret = -EFBIG;
2975                                 goto out;
2976                         }
2977                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2978                                  ".%llu", rec->ino);
2979                         namelen += count_digits(rec->ino) + 1;
2980                         ret = btrfs_add_link(trans, root, rec->ino,
2981                                              lost_found_ino, namebuf,
2982                                              namelen, type, NULL, 1);
2983                 }
2984                 if (ret < 0) {
2985                         fprintf(stderr,
2986                                 "Failed to link the inode %llu to %s dir: %s\n",
2987                                 rec->ino, dir_name, strerror(-ret));
2988                         goto out;
2989                 }
2990                 /*
2991                  * Just increase the found_link, don't actually add the
2992                  * backref. This will make things easier and this inode
2993                  * record will be freed after the repair is done.
2994                  * So fsck will not report problem about this inode.
2995                  */
2996                 rec->found_link++;
2997                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2998                        namelen, namebuf, dir_name);
2999         }
3000         printf("Fixed the nlink of inode %llu\n", rec->ino);
3001 out:
3002         /*
3003          * Clear the flag anyway, or we will loop forever for the same inode
3004          * as it will not be removed from the bad inode list and the dead loop
3005          * happens.
3006          */
3007         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3008         btrfs_release_path(path);
3009         return ret;
3010 }
3011
3012 /*
3013  * Check if there is any normal(reg or prealloc) file extent for given
3014  * ino.
3015  * This is used to determine the file type when neither its dir_index/item or
3016  * inode_item exists.
3017  *
3018  * This will *NOT* report error, if any error happens, just consider it does
3019  * not have any normal file extent.
3020  */
3021 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3022 {
3023         struct btrfs_path path;
3024         struct btrfs_key key;
3025         struct btrfs_key found_key;
3026         struct btrfs_file_extent_item *fi;
3027         u8 type;
3028         int ret = 0;
3029
3030         btrfs_init_path(&path);
3031         key.objectid = ino;
3032         key.type = BTRFS_EXTENT_DATA_KEY;
3033         key.offset = 0;
3034
3035         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3036         if (ret < 0) {
3037                 ret = 0;
3038                 goto out;
3039         }
3040         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3041                 ret = btrfs_next_leaf(root, &path);
3042                 if (ret) {
3043                         ret = 0;
3044                         goto out;
3045                 }
3046         }
3047         while (1) {
3048                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3049                                       path.slots[0]);
3050                 if (found_key.objectid != ino ||
3051                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3052                         break;
3053                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3054                                     struct btrfs_file_extent_item);
3055                 type = btrfs_file_extent_type(path.nodes[0], fi);
3056                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3057                         ret = 1;
3058                         goto out;
3059                 }
3060         }
3061 out:
3062         btrfs_release_path(&path);
3063         return ret;
3064 }
3065
3066 static u32 btrfs_type_to_imode(u8 type)
3067 {
3068         static u32 imode_by_btrfs_type[] = {
3069                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3070                 [BTRFS_FT_DIR]          = S_IFDIR,
3071                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3072                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3073                 [BTRFS_FT_FIFO]         = S_IFIFO,
3074                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3075                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3076         };
3077
3078         return imode_by_btrfs_type[(type)];
3079 }
3080
3081 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3082                                 struct btrfs_root *root,
3083                                 struct btrfs_path *path,
3084                                 struct inode_record *rec)
3085 {
3086         u8 filetype;
3087         u32 mode = 0700;
3088         int type_recovered = 0;
3089         int ret = 0;
3090
3091         printf("Trying to rebuild inode:%llu\n", rec->ino);
3092
3093         type_recovered = !find_file_type(rec, &filetype);
3094
3095         /*
3096          * Try to determine inode type if type not found.
3097          *
3098          * For found regular file extent, it must be FILE.
3099          * For found dir_item/index, it must be DIR.
3100          *
3101          * For undetermined one, use FILE as fallback.
3102          *
3103          * TODO:
3104          * 1. If found backref(inode_index/item is already handled) to it,
3105          *    it must be DIR.
3106          *    Need new inode-inode ref structure to allow search for that.
3107          */
3108         if (!type_recovered) {
3109                 if (rec->found_file_extent &&
3110                     find_normal_file_extent(root, rec->ino)) {
3111                         type_recovered = 1;
3112                         filetype = BTRFS_FT_REG_FILE;
3113                 } else if (rec->found_dir_item) {
3114                         type_recovered = 1;
3115                         filetype = BTRFS_FT_DIR;
3116                 } else if (!list_empty(&rec->orphan_extents)) {
3117                         type_recovered = 1;
3118                         filetype = BTRFS_FT_REG_FILE;
3119                 } else{
3120                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3121                                rec->ino);
3122                         type_recovered = 1;
3123                         filetype = BTRFS_FT_REG_FILE;
3124                 }
3125         }
3126
3127         ret = btrfs_new_inode(trans, root, rec->ino,
3128                               mode | btrfs_type_to_imode(filetype));
3129         if (ret < 0)
3130                 goto out;
3131
3132         /*
3133          * Here inode rebuild is done, we only rebuild the inode item,
3134          * don't repair the nlink(like move to lost+found).
3135          * That is the job of nlink repair.
3136          *
3137          * We just fill the record and return
3138          */
3139         rec->found_dir_item = 1;
3140         rec->imode = mode | btrfs_type_to_imode(filetype);
3141         rec->nlink = 0;
3142         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3143         /* Ensure the inode_nlinks repair function will be called */
3144         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3145 out:
3146         return ret;
3147 }
3148
3149 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3150                                       struct btrfs_root *root,
3151                                       struct btrfs_path *path,
3152                                       struct inode_record *rec)
3153 {
3154         struct orphan_data_extent *orphan;
3155         struct orphan_data_extent *tmp;
3156         int ret = 0;
3157
3158         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3159                 /*
3160                  * Check for conflicting file extents
3161                  *
3162                  * Here we don't know whether the extents is compressed or not,
3163                  * so we can only assume it not compressed nor data offset,
3164                  * and use its disk_len as extent length.
3165                  */
3166                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3167                                        orphan->offset, orphan->disk_len, 0);
3168                 btrfs_release_path(path);
3169                 if (ret < 0)
3170                         goto out;
3171                 if (!ret) {
3172                         fprintf(stderr,
3173                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3174                                 orphan->disk_bytenr, orphan->disk_len);
3175                         ret = btrfs_free_extent(trans,
3176                                         root->fs_info->extent_root,
3177                                         orphan->disk_bytenr, orphan->disk_len,
3178                                         0, root->objectid, orphan->objectid,
3179                                         orphan->offset);
3180                         if (ret < 0)
3181                                 goto out;
3182                 }
3183                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3184                                 orphan->offset, orphan->disk_bytenr,
3185                                 orphan->disk_len, orphan->disk_len);
3186                 if (ret < 0)
3187                         goto out;
3188
3189                 /* Update file size info */
3190                 rec->found_size += orphan->disk_len;
3191                 if (rec->found_size == rec->nbytes)
3192                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3193
3194                 /* Update the file extent hole info too */
3195                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3196                                            orphan->disk_len);
3197                 if (ret < 0)
3198                         goto out;
3199                 if (RB_EMPTY_ROOT(&rec->holes))
3200                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3201
3202                 list_del(&orphan->list);
3203                 free(orphan);
3204         }
3205         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3206 out:
3207         return ret;
3208 }
3209
3210 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3211                                         struct btrfs_root *root,
3212                                         struct btrfs_path *path,
3213                                         struct inode_record *rec)
3214 {
3215         struct rb_node *node;
3216         struct file_extent_hole *hole;
3217         int found = 0;
3218         int ret = 0;
3219
3220         node = rb_first(&rec->holes);
3221
3222         while (node) {
3223                 found = 1;
3224                 hole = rb_entry(node, struct file_extent_hole, node);
3225                 ret = btrfs_punch_hole(trans, root, rec->ino,
3226                                        hole->start, hole->len);
3227                 if (ret < 0)
3228                         goto out;
3229                 ret = del_file_extent_hole(&rec->holes, hole->start,
3230                                            hole->len);
3231                 if (ret < 0)
3232                         goto out;
3233                 if (RB_EMPTY_ROOT(&rec->holes))
3234                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3235                 node = rb_first(&rec->holes);
3236         }
3237         /* special case for a file losing all its file extent */
3238         if (!found) {
3239                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3240                                        round_up(rec->isize,
3241                                                 root->fs_info->sectorsize));
3242                 if (ret < 0)
3243                         goto out;
3244         }
3245         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3246                rec->ino, root->objectid);
3247 out:
3248         return ret;
3249 }
3250
3251 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3252 {
3253         struct btrfs_trans_handle *trans;
3254         struct btrfs_path path;
3255         int ret = 0;
3256
3257         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3258                              I_ERR_NO_ORPHAN_ITEM |
3259                              I_ERR_LINK_COUNT_WRONG |
3260                              I_ERR_NO_INODE_ITEM |
3261                              I_ERR_FILE_EXTENT_ORPHAN |
3262                              I_ERR_FILE_EXTENT_DISCOUNT|
3263                              I_ERR_FILE_NBYTES_WRONG)))
3264                 return rec->errors;
3265
3266         /*
3267          * For nlink repair, it may create a dir and add link, so
3268          * 2 for parent(256)'s dir_index and dir_item
3269          * 2 for lost+found dir's inode_item and inode_ref
3270          * 1 for the new inode_ref of the file
3271          * 2 for lost+found dir's dir_index and dir_item for the file
3272          */
3273         trans = btrfs_start_transaction(root, 7);
3274         if (IS_ERR(trans))
3275                 return PTR_ERR(trans);
3276
3277         btrfs_init_path(&path);
3278         if (rec->errors & I_ERR_NO_INODE_ITEM)
3279                 ret = repair_inode_no_item(trans, root, &path, rec);
3280         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3281                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3282         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3283                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3284         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3285                 ret = repair_inode_isize(trans, root, &path, rec);
3286         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3287                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3288         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3289                 ret = repair_inode_nlinks(trans, root, &path, rec);
3290         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3291                 ret = repair_inode_nbytes(trans, root, &path, rec);
3292         btrfs_commit_transaction(trans, root);
3293         btrfs_release_path(&path);
3294         return ret;
3295 }
3296
3297 static int check_inode_recs(struct btrfs_root *root,
3298                             struct cache_tree *inode_cache)
3299 {
3300         struct cache_extent *cache;
3301         struct ptr_node *node;
3302         struct inode_record *rec;
3303         struct inode_backref *backref;
3304         int stage = 0;
3305         int ret = 0;
3306         int err = 0;
3307         u64 error = 0;
3308         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3309
3310         if (btrfs_root_refs(&root->root_item) == 0) {
3311                 if (!cache_tree_empty(inode_cache))
3312                         fprintf(stderr, "warning line %d\n", __LINE__);
3313                 return 0;
3314         }
3315
3316         /*
3317          * We need to repair backrefs first because we could change some of the
3318          * errors in the inode recs.
3319          *
3320          * We also need to go through and delete invalid backrefs first and then
3321          * add the correct ones second.  We do this because we may get EEXIST
3322          * when adding back the correct index because we hadn't yet deleted the
3323          * invalid index.
3324          *
3325          * For example, if we were missing a dir index then the directories
3326          * isize would be wrong, so if we fixed the isize to what we thought it
3327          * would be and then fixed the backref we'd still have a invalid fs, so
3328          * we need to add back the dir index and then check to see if the isize
3329          * is still wrong.
3330          */
3331         while (stage < 3) {
3332                 stage++;
3333                 if (stage == 3 && !err)
3334                         break;
3335
3336                 cache = search_cache_extent(inode_cache, 0);
3337                 while (repair && cache) {
3338                         node = container_of(cache, struct ptr_node, cache);
3339                         rec = node->data;
3340                         cache = next_cache_extent(cache);
3341
3342                         /* Need to free everything up and rescan */
3343                         if (stage == 3) {
3344                                 remove_cache_extent(inode_cache, &node->cache);
3345                                 free(node);
3346                                 free_inode_rec(rec);
3347                                 continue;
3348                         }
3349
3350                         if (list_empty(&rec->backrefs))
3351                                 continue;
3352
3353                         ret = repair_inode_backrefs(root, rec, inode_cache,
3354                                                     stage == 1);
3355                         if (ret < 0) {
3356                                 err = ret;
3357                                 stage = 2;
3358                                 break;
3359                         } if (ret > 0) {
3360                                 err = -EAGAIN;
3361                         }
3362                 }
3363         }
3364         if (err)
3365                 return err;
3366
3367         rec = get_inode_rec(inode_cache, root_dirid, 0);
3368         BUG_ON(IS_ERR(rec));
3369         if (rec) {
3370                 ret = check_root_dir(rec);
3371                 if (ret) {
3372                         fprintf(stderr, "root %llu root dir %llu error\n",
3373                                 (unsigned long long)root->root_key.objectid,
3374                                 (unsigned long long)root_dirid);
3375                         print_inode_error(root, rec);
3376                         error++;
3377                 }
3378         } else {
3379                 if (repair) {
3380                         struct btrfs_trans_handle *trans;
3381
3382                         trans = btrfs_start_transaction(root, 1);
3383                         if (IS_ERR(trans)) {
3384                                 err = PTR_ERR(trans);
3385                                 return err;
3386                         }
3387
3388                         fprintf(stderr,
3389                                 "root %llu missing its root dir, recreating\n",
3390                                 (unsigned long long)root->objectid);
3391
3392                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3393                         BUG_ON(ret);
3394
3395                         btrfs_commit_transaction(trans, root);
3396                         return -EAGAIN;
3397                 }
3398
3399                 fprintf(stderr, "root %llu root dir %llu not found\n",
3400                         (unsigned long long)root->root_key.objectid,
3401                         (unsigned long long)root_dirid);
3402         }
3403
3404         while (1) {
3405                 cache = search_cache_extent(inode_cache, 0);
3406                 if (!cache)
3407                         break;
3408                 node = container_of(cache, struct ptr_node, cache);
3409                 rec = node->data;
3410                 remove_cache_extent(inode_cache, &node->cache);
3411                 free(node);
3412                 if (rec->ino == root_dirid ||
3413                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3414                         free_inode_rec(rec);
3415                         continue;
3416                 }
3417
3418                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3419                         ret = check_orphan_item(root, rec->ino);
3420                         if (ret == 0)
3421                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3422                         if (can_free_inode_rec(rec)) {
3423                                 free_inode_rec(rec);
3424                                 continue;
3425                         }
3426                 }
3427
3428                 if (!rec->found_inode_item)
3429                         rec->errors |= I_ERR_NO_INODE_ITEM;
3430                 if (rec->found_link != rec->nlink)
3431                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3432                 if (repair) {
3433                         ret = try_repair_inode(root, rec);
3434                         if (ret == 0 && can_free_inode_rec(rec)) {
3435                                 free_inode_rec(rec);
3436                                 continue;
3437                         }
3438                         ret = 0;
3439                 }
3440
3441                 if (!(repair && ret == 0))
3442                         error++;
3443                 print_inode_error(root, rec);
3444                 list_for_each_entry(backref, &rec->backrefs, list) {
3445                         if (!backref->found_dir_item)
3446                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3447                         if (!backref->found_dir_index)
3448                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3449                         if (!backref->found_inode_ref)
3450                                 backref->errors |= REF_ERR_NO_INODE_REF;
3451                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3452                                 " namelen %u name %s filetype %d errors %x",
3453                                 (unsigned long long)backref->dir,
3454                                 (unsigned long long)backref->index,
3455                                 backref->namelen, backref->name,
3456                                 backref->filetype, backref->errors);
3457                         print_ref_error(backref->errors);
3458                 }
3459                 free_inode_rec(rec);
3460         }
3461         return (error > 0) ? -1 : 0;
3462 }
3463
3464 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3465                                         u64 objectid)
3466 {
3467         struct cache_extent *cache;
3468         struct root_record *rec = NULL;
3469         int ret;
3470
3471         cache = lookup_cache_extent(root_cache, objectid, 1);
3472         if (cache) {
3473                 rec = container_of(cache, struct root_record, cache);
3474         } else {
3475                 rec = calloc(1, sizeof(*rec));
3476                 if (!rec)
3477                         return ERR_PTR(-ENOMEM);
3478                 rec->objectid = objectid;
3479                 INIT_LIST_HEAD(&rec->backrefs);
3480                 rec->cache.start = objectid;
3481                 rec->cache.size = 1;
3482
3483                 ret = insert_cache_extent(root_cache, &rec->cache);
3484                 if (ret)
3485                         return ERR_PTR(-EEXIST);
3486         }
3487         return rec;
3488 }
3489
3490 static struct root_backref *get_root_backref(struct root_record *rec,
3491                                              u64 ref_root, u64 dir, u64 index,
3492                                              const char *name, int namelen)
3493 {
3494         struct root_backref *backref;
3495
3496         list_for_each_entry(backref, &rec->backrefs, list) {
3497                 if (backref->ref_root != ref_root || backref->dir != dir ||
3498                     backref->namelen != namelen)
3499                         continue;
3500                 if (memcmp(name, backref->name, namelen))
3501                         continue;
3502                 return backref;
3503         }
3504
3505         backref = calloc(1, sizeof(*backref) + namelen + 1);
3506         if (!backref)
3507                 return NULL;
3508         backref->ref_root = ref_root;
3509         backref->dir = dir;
3510         backref->index = index;
3511         backref->namelen = namelen;
3512         memcpy(backref->name, name, namelen);
3513         backref->name[namelen] = '\0';
3514         list_add_tail(&backref->list, &rec->backrefs);
3515         return backref;
3516 }
3517
3518 static void free_root_record(struct cache_extent *cache)
3519 {
3520         struct root_record *rec;
3521         struct root_backref *backref;
3522
3523         rec = container_of(cache, struct root_record, cache);
3524         while (!list_empty(&rec->backrefs)) {
3525                 backref = to_root_backref(rec->backrefs.next);
3526                 list_del(&backref->list);
3527                 free(backref);
3528         }
3529
3530         free(rec);
3531 }
3532
3533 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3534
3535 static int add_root_backref(struct cache_tree *root_cache,
3536                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3537                             const char *name, int namelen,
3538                             int item_type, int errors)
3539 {
3540         struct root_record *rec;
3541         struct root_backref *backref;
3542
3543         rec = get_root_rec(root_cache, root_id);
3544         BUG_ON(IS_ERR(rec));
3545         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3546         BUG_ON(!backref);
3547
3548         backref->errors |= errors;
3549
3550         if (item_type != BTRFS_DIR_ITEM_KEY) {
3551                 if (backref->found_dir_index || backref->found_back_ref ||
3552                     backref->found_forward_ref) {
3553                         if (backref->index != index)
3554                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3555                 } else {
3556                         backref->index = index;
3557                 }
3558         }
3559
3560         if (item_type == BTRFS_DIR_ITEM_KEY) {
3561                 if (backref->found_forward_ref)
3562                         rec->found_ref++;
3563                 backref->found_dir_item = 1;
3564         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3565                 backref->found_dir_index = 1;
3566         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3567                 if (backref->found_forward_ref)
3568                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3569                 else if (backref->found_dir_item)
3570                         rec->found_ref++;
3571                 backref->found_forward_ref = 1;
3572         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3573                 if (backref->found_back_ref)
3574                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3575                 backref->found_back_ref = 1;
3576         } else {
3577                 BUG_ON(1);
3578         }
3579
3580         if (backref->found_forward_ref && backref->found_dir_item)
3581                 backref->reachable = 1;
3582         return 0;
3583 }
3584
3585 static int merge_root_recs(struct btrfs_root *root,
3586                            struct cache_tree *src_cache,
3587                            struct cache_tree *dst_cache)
3588 {
3589         struct cache_extent *cache;
3590         struct ptr_node *node;
3591         struct inode_record *rec;
3592         struct inode_backref *backref;
3593         int ret = 0;
3594
3595         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3596                 free_inode_recs_tree(src_cache);
3597                 return 0;
3598         }
3599
3600         while (1) {
3601                 cache = search_cache_extent(src_cache, 0);
3602                 if (!cache)
3603                         break;
3604                 node = container_of(cache, struct ptr_node, cache);
3605                 rec = node->data;
3606                 remove_cache_extent(src_cache, &node->cache);
3607                 free(node);
3608
3609                 ret = is_child_root(root, root->objectid, rec->ino);
3610                 if (ret < 0)
3611                         break;
3612                 else if (ret == 0)
3613                         goto skip;
3614
3615                 list_for_each_entry(backref, &rec->backrefs, list) {
3616                         BUG_ON(backref->found_inode_ref);
3617                         if (backref->found_dir_item)
3618                                 add_root_backref(dst_cache, rec->ino,
3619                                         root->root_key.objectid, backref->dir,
3620                                         backref->index, backref->name,
3621                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3622                                         backref->errors);
3623                         if (backref->found_dir_index)
3624                                 add_root_backref(dst_cache, rec->ino,
3625                                         root->root_key.objectid, backref->dir,
3626                                         backref->index, backref->name,
3627                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3628                                         backref->errors);
3629                 }
3630 skip:
3631                 free_inode_rec(rec);
3632         }
3633         if (ret < 0)
3634                 return ret;
3635         return 0;
3636 }
3637
3638 static int check_root_refs(struct btrfs_root *root,
3639                            struct cache_tree *root_cache)
3640 {
3641         struct root_record *rec;
3642         struct root_record *ref_root;
3643         struct root_backref *backref;
3644         struct cache_extent *cache;
3645         int loop = 1;
3646         int ret;
3647         int error;
3648         int errors = 0;
3649
3650         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3651         BUG_ON(IS_ERR(rec));
3652         rec->found_ref = 1;
3653
3654         /* fixme: this can not detect circular references */
3655         while (loop) {
3656                 loop = 0;
3657                 cache = search_cache_extent(root_cache, 0);
3658                 while (1) {
3659                         if (!cache)
3660                                 break;
3661                         rec = container_of(cache, struct root_record, cache);
3662                         cache = next_cache_extent(cache);
3663
3664                         if (rec->found_ref == 0)
3665                                 continue;
3666
3667                         list_for_each_entry(backref, &rec->backrefs, list) {
3668                                 if (!backref->reachable)
3669                                         continue;
3670
3671                                 ref_root = get_root_rec(root_cache,
3672                                                         backref->ref_root);
3673                                 BUG_ON(IS_ERR(ref_root));
3674                                 if (ref_root->found_ref > 0)
3675                                         continue;
3676
3677                                 backref->reachable = 0;
3678                                 rec->found_ref--;
3679                                 if (rec->found_ref == 0)
3680                                         loop = 1;
3681                         }
3682                 }
3683         }
3684
3685         cache = search_cache_extent(root_cache, 0);
3686         while (1) {
3687                 if (!cache)
3688                         break;
3689                 rec = container_of(cache, struct root_record, cache);
3690                 cache = next_cache_extent(cache);
3691
3692                 if (rec->found_ref == 0 &&
3693                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3694                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3695                         ret = check_orphan_item(root->fs_info->tree_root,
3696                                                 rec->objectid);
3697                         if (ret == 0)
3698                                 continue;
3699
3700                         /*
3701                          * If we don't have a root item then we likely just have
3702                          * a dir item in a snapshot for this root but no actual
3703                          * ref key or anything so it's meaningless.
3704                          */
3705                         if (!rec->found_root_item)
3706                                 continue;
3707                         errors++;
3708                         fprintf(stderr, "fs tree %llu not referenced\n",
3709                                 (unsigned long long)rec->objectid);
3710                 }
3711
3712                 error = 0;
3713                 if (rec->found_ref > 0 && !rec->found_root_item)
3714                         error = 1;
3715                 list_for_each_entry(backref, &rec->backrefs, list) {
3716                         if (!backref->found_dir_item)
3717                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3718                         if (!backref->found_dir_index)
3719                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3720                         if (!backref->found_back_ref)
3721                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3722                         if (!backref->found_forward_ref)
3723                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3724                         if (backref->reachable && backref->errors)
3725                                 error = 1;
3726                 }
3727                 if (!error)
3728                         continue;
3729
3730                 errors++;
3731                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3732                         (unsigned long long)rec->objectid, rec->found_ref,
3733                          rec->found_root_item ? "" : "not found");
3734
3735                 list_for_each_entry(backref, &rec->backrefs, list) {
3736                         if (!backref->reachable)
3737                                 continue;
3738                         if (!backref->errors && rec->found_root_item)
3739                                 continue;
3740                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3741                                 " index %llu namelen %u name %s errors %x\n",
3742                                 (unsigned long long)backref->ref_root,
3743                                 (unsigned long long)backref->dir,
3744                                 (unsigned long long)backref->index,
3745                                 backref->namelen, backref->name,
3746                                 backref->errors);
3747                         print_ref_error(backref->errors);
3748                 }
3749         }
3750         return errors > 0 ? 1 : 0;
3751 }
3752
3753 static int process_root_ref(struct extent_buffer *eb, int slot,
3754                             struct btrfs_key *key,
3755                             struct cache_tree *root_cache)
3756 {
3757         u64 dirid;
3758         u64 index;
3759         u32 len;
3760         u32 name_len;
3761         struct btrfs_root_ref *ref;
3762         char namebuf[BTRFS_NAME_LEN];
3763         int error;
3764
3765         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3766
3767         dirid = btrfs_root_ref_dirid(eb, ref);
3768         index = btrfs_root_ref_sequence(eb, ref);
3769         name_len = btrfs_root_ref_name_len(eb, ref);
3770
3771         if (name_len <= BTRFS_NAME_LEN) {
3772                 len = name_len;
3773                 error = 0;
3774         } else {
3775                 len = BTRFS_NAME_LEN;
3776                 error = REF_ERR_NAME_TOO_LONG;
3777         }
3778         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3779
3780         if (key->type == BTRFS_ROOT_REF_KEY) {
3781                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3782                                  index, namebuf, len, key->type, error);
3783         } else {
3784                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3785                                  index, namebuf, len, key->type, error);
3786         }
3787         return 0;
3788 }
3789
3790 static void free_corrupt_block(struct cache_extent *cache)
3791 {
3792         struct btrfs_corrupt_block *corrupt;
3793
3794         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3795         free(corrupt);
3796 }
3797
3798 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3799
3800 /*
3801  * Repair the btree of the given root.
3802  *
3803  * The fix is to remove the node key in corrupt_blocks cache_tree.
3804  * and rebalance the tree.
3805  * After the fix, the btree should be writeable.
3806  */
3807 static int repair_btree(struct btrfs_root *root,
3808                         struct cache_tree *corrupt_blocks)
3809 {
3810         struct btrfs_trans_handle *trans;
3811         struct btrfs_path path;
3812         struct btrfs_corrupt_block *corrupt;
3813         struct cache_extent *cache;
3814         struct btrfs_key key;
3815         u64 offset;
3816         int level;
3817         int ret = 0;
3818
3819         if (cache_tree_empty(corrupt_blocks))
3820                 return 0;
3821
3822         trans = btrfs_start_transaction(root, 1);
3823         if (IS_ERR(trans)) {
3824                 ret = PTR_ERR(trans);
3825                 fprintf(stderr, "Error starting transaction: %s\n",
3826                         strerror(-ret));
3827                 return ret;
3828         }
3829         btrfs_init_path(&path);
3830         cache = first_cache_extent(corrupt_blocks);
3831         while (cache) {
3832                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3833                                        cache);
3834                 level = corrupt->level;
3835                 path.lowest_level = level;
3836                 key.objectid = corrupt->key.objectid;
3837                 key.type = corrupt->key.type;
3838                 key.offset = corrupt->key.offset;
3839
3840                 /*
3841                  * Here we don't want to do any tree balance, since it may
3842                  * cause a balance with corrupted brother leaf/node,
3843                  * so ins_len set to 0 here.
3844                  * Balance will be done after all corrupt node/leaf is deleted.
3845                  */
3846                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3847                 if (ret < 0)
3848                         goto out;
3849                 offset = btrfs_node_blockptr(path.nodes[level],
3850                                              path.slots[level]);
3851
3852                 /* Remove the ptr */
3853                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3854                 if (ret < 0)
3855                         goto out;
3856                 /*
3857                  * Remove the corresponding extent
3858                  * return value is not concerned.
3859                  */
3860                 btrfs_release_path(&path);
3861                 ret = btrfs_free_extent(trans, root, offset,
3862                                 root->fs_info->nodesize, 0,
3863                                 root->root_key.objectid, level - 1, 0);
3864                 cache = next_cache_extent(cache);
3865         }
3866
3867         /* Balance the btree using btrfs_search_slot() */
3868         cache = first_cache_extent(corrupt_blocks);
3869         while (cache) {
3870                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3871                                        cache);
3872                 memcpy(&key, &corrupt->key, sizeof(key));
3873                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3874                 if (ret < 0)
3875                         goto out;
3876                 /* return will always >0 since it won't find the item */
3877                 ret = 0;
3878                 btrfs_release_path(&path);
3879                 cache = next_cache_extent(cache);
3880         }
3881 out:
3882         btrfs_commit_transaction(trans, root);
3883         btrfs_release_path(&path);
3884         return ret;
3885 }
3886
3887 static int check_fs_root(struct btrfs_root *root,
3888                          struct cache_tree *root_cache,
3889                          struct walk_control *wc)
3890 {
3891         int ret = 0;
3892         int err = 0;
3893         int wret;
3894         int level;
3895         struct btrfs_path path;
3896         struct shared_node root_node;
3897         struct root_record *rec;
3898         struct btrfs_root_item *root_item = &root->root_item;
3899         struct cache_tree corrupt_blocks;
3900         struct orphan_data_extent *orphan;
3901         struct orphan_data_extent *tmp;
3902         enum btrfs_tree_block_status status;
3903         struct node_refs nrefs;
3904
3905         /*
3906          * Reuse the corrupt_block cache tree to record corrupted tree block
3907          *
3908          * Unlike the usage in extent tree check, here we do it in a per
3909          * fs/subvol tree base.
3910          */
3911         cache_tree_init(&corrupt_blocks);
3912         root->fs_info->corrupt_blocks = &corrupt_blocks;
3913
3914         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3915                 rec = get_root_rec(root_cache, root->root_key.objectid);
3916                 BUG_ON(IS_ERR(rec));
3917                 if (btrfs_root_refs(root_item) > 0)
3918                         rec->found_root_item = 1;
3919         }
3920
3921         btrfs_init_path(&path);
3922         memset(&root_node, 0, sizeof(root_node));
3923         cache_tree_init(&root_node.root_cache);
3924         cache_tree_init(&root_node.inode_cache);
3925         memset(&nrefs, 0, sizeof(nrefs));
3926
3927         /* Move the orphan extent record to corresponding inode_record */
3928         list_for_each_entry_safe(orphan, tmp,
3929                                  &root->orphan_data_extents, list) {
3930                 struct inode_record *inode;
3931
3932                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3933                                       1);
3934                 BUG_ON(IS_ERR(inode));
3935                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3936                 list_move(&orphan->list, &inode->orphan_extents);
3937         }
3938
3939         level = btrfs_header_level(root->node);
3940         memset(wc->nodes, 0, sizeof(wc->nodes));
3941         wc->nodes[level] = &root_node;
3942         wc->active_node = level;
3943         wc->root_level = level;
3944
3945         /* We may not have checked the root block, lets do that now */
3946         if (btrfs_is_leaf(root->node))
3947                 status = btrfs_check_leaf(root, NULL, root->node);
3948         else
3949                 status = btrfs_check_node(root, NULL, root->node);
3950         if (status != BTRFS_TREE_BLOCK_CLEAN)
3951                 return -EIO;
3952
3953         if (btrfs_root_refs(root_item) > 0 ||
3954             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3955                 path.nodes[level] = root->node;
3956                 extent_buffer_get(root->node);
3957                 path.slots[level] = 0;
3958         } else {
3959                 struct btrfs_key key;
3960                 struct btrfs_disk_key found_key;
3961
3962                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3963                 level = root_item->drop_level;
3964                 path.lowest_level = level;
3965                 if (level > btrfs_header_level(root->node) ||
3966                     level >= BTRFS_MAX_LEVEL) {
3967                         error("ignoring invalid drop level: %u", level);
3968                         goto skip_walking;
3969                 }
3970                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3971                 if (wret < 0)
3972                         goto skip_walking;
3973                 btrfs_node_key(path.nodes[level], &found_key,
3974                                 path.slots[level]);
3975                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3976                                         sizeof(found_key)));
3977         }
3978
3979         while (1) {
3980                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3981                 if (wret < 0)
3982                         ret = wret;
3983                 if (wret != 0)
3984                         break;
3985
3986                 wret = walk_up_tree(root, &path, wc, &level);
3987                 if (wret < 0)
3988                         ret = wret;
3989                 if (wret != 0)
3990                         break;
3991         }
3992 skip_walking:
3993         btrfs_release_path(&path);
3994
3995         if (!cache_tree_empty(&corrupt_blocks)) {
3996                 struct cache_extent *cache;
3997                 struct btrfs_corrupt_block *corrupt;
3998
3999                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4000                        root->root_key.objectid);
4001                 cache = first_cache_extent(&corrupt_blocks);
4002                 while (cache) {
4003                         corrupt = container_of(cache,
4004                                                struct btrfs_corrupt_block,
4005                                                cache);
4006                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4007                                cache->start, corrupt->level,
4008                                corrupt->key.objectid, corrupt->key.type,
4009                                corrupt->key.offset);
4010                         cache = next_cache_extent(cache);
4011                 }
4012                 if (repair) {
4013                         printf("Try to repair the btree for root %llu\n",
4014                                root->root_key.objectid);
4015                         ret = repair_btree(root, &corrupt_blocks);
4016                         if (ret < 0)
4017                                 fprintf(stderr, "Failed to repair btree: %s\n",
4018                                         strerror(-ret));
4019                         if (!ret)
4020                                 printf("Btree for root %llu is fixed\n",
4021                                        root->root_key.objectid);
4022                 }
4023         }
4024
4025         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4026         if (err < 0)
4027                 ret = err;
4028
4029         if (root_node.current) {
4030                 root_node.current->checked = 1;
4031                 maybe_free_inode_rec(&root_node.inode_cache,
4032                                 root_node.current);
4033         }
4034
4035         err = check_inode_recs(root, &root_node.inode_cache);
4036         if (!ret)
4037                 ret = err;
4038
4039         free_corrupt_blocks_tree(&corrupt_blocks);
4040         root->fs_info->corrupt_blocks = NULL;
4041         free_orphan_data_extents(&root->orphan_data_extents);
4042         return ret;
4043 }
4044
4045 static int fs_root_objectid(u64 objectid)
4046 {
4047         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4048             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4049                 return 1;
4050         return is_fstree(objectid);
4051 }
4052
4053 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4054                           struct cache_tree *root_cache)
4055 {
4056         struct btrfs_path path;
4057         struct btrfs_key key;
4058         struct walk_control wc;
4059         struct extent_buffer *leaf, *tree_node;
4060         struct btrfs_root *root = fs_info->fs_root;
4061         struct btrfs_root *tmp_root;
4062         struct btrfs_root *tree_root = root->fs_info->tree_root;
4063         int ret;
4064         int err = 0;
4065
4066         if (ctx.progress_enabled) {
4067                 ctx.tp = TASK_FS_ROOTS;
4068                 task_start(ctx.info);
4069         }
4070
4071         /*
4072          * Just in case we made any changes to the extent tree that weren't
4073          * reflected into the free space cache yet.
4074          */
4075         if (repair)
4076                 reset_cached_block_groups(root->fs_info);
4077         memset(&wc, 0, sizeof(wc));
4078         cache_tree_init(&wc.shared);
4079         btrfs_init_path(&path);
4080
4081 again:
4082         key.offset = 0;
4083         key.objectid = 0;
4084         key.type = BTRFS_ROOT_ITEM_KEY;
4085         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4086         if (ret < 0) {
4087                 err = 1;
4088                 goto out;
4089         }
4090         tree_node = tree_root->node;
4091         while (1) {
4092                 if (tree_node != tree_root->node) {
4093                         free_root_recs_tree(root_cache);
4094                         btrfs_release_path(&path);
4095                         goto again;
4096                 }
4097                 leaf = path.nodes[0];
4098                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4099                         ret = btrfs_next_leaf(tree_root, &path);
4100                         if (ret) {
4101                                 if (ret < 0)
4102                                         err = 1;
4103                                 break;
4104                         }
4105                         leaf = path.nodes[0];
4106                 }
4107                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4108                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4109                     fs_root_objectid(key.objectid)) {
4110                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4111                                 tmp_root = btrfs_read_fs_root_no_cache(
4112                                                 root->fs_info, &key);
4113                         } else {
4114                                 key.offset = (u64)-1;
4115                                 tmp_root = btrfs_read_fs_root(
4116                                                 root->fs_info, &key);
4117                         }
4118                         if (IS_ERR(tmp_root)) {
4119                                 err = 1;
4120                                 goto next;
4121                         }
4122                         ret = check_fs_root(tmp_root, root_cache, &wc);
4123                         if (ret == -EAGAIN) {
4124                                 free_root_recs_tree(root_cache);
4125                                 btrfs_release_path(&path);
4126                                 goto again;
4127                         }
4128                         if (ret)
4129                                 err = 1;
4130                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4131                                 btrfs_free_fs_root(tmp_root);
4132                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4133                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4134                         process_root_ref(leaf, path.slots[0], &key,
4135                                          root_cache);
4136                 }
4137 next:
4138                 path.slots[0]++;
4139         }
4140 out:
4141         btrfs_release_path(&path);
4142         if (err)
4143                 free_extent_cache_tree(&wc.shared);
4144         if (!cache_tree_empty(&wc.shared))
4145                 fprintf(stderr, "warning line %d\n", __LINE__);
4146
4147         task_stop(ctx.info);
4148
4149         return err;
4150 }
4151
4152 /*
4153  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4154  * INODE_REF/INODE_EXTREF match.
4155  *
4156  * @root:       the root of the fs/file tree
4157  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4158  * @key:        the key of the DIR_ITEM/DIR_INDEX
4159  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4160  *              distinguish root_dir between normal dir/file
4161  * @name:       the name in the INODE_REF/INODE_EXTREF
4162  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4163  * @mode:       the st_mode of INODE_ITEM
4164  *
4165  * Return 0 if no error occurred.
4166  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4167  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4168  * dir/file.
4169  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4170  * not match for normal dir/file.
4171  */
4172 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4173                          struct btrfs_key *key, u64 index, char *name,
4174                          u32 namelen, u32 mode)
4175 {
4176         struct btrfs_path path;
4177         struct extent_buffer *node;
4178         struct btrfs_dir_item *di;
4179         struct btrfs_key location;
4180         char namebuf[BTRFS_NAME_LEN] = {0};
4181         u32 total;
4182         u32 cur = 0;
4183         u32 len;
4184         u32 name_len;
4185         u32 data_len;
4186         u8 filetype;
4187         int slot;
4188         int ret;
4189
4190         btrfs_init_path(&path);
4191         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4192         if (ret < 0) {
4193                 ret = DIR_ITEM_MISSING;
4194                 goto out;
4195         }
4196
4197         /* Process root dir and goto out*/
4198         if (index == 0) {
4199                 if (ret == 0) {
4200                         ret = ROOT_DIR_ERROR;
4201                         error(
4202                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4203                                 root->objectid,
4204                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4205                                         "REF" : "EXTREF",
4206                                 ref_key->objectid, ref_key->offset,
4207                                 key->type == BTRFS_DIR_ITEM_KEY ?
4208                                         "DIR_ITEM" : "DIR_INDEX");
4209                 } else {
4210                         ret = 0;
4211                 }
4212
4213                 goto out;
4214         }
4215
4216         /* Process normal file/dir */
4217         if (ret > 0) {
4218                 ret = DIR_ITEM_MISSING;
4219                 error(
4220                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4221                         root->objectid,
4222                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4223                         ref_key->objectid, ref_key->offset,
4224                         key->type == BTRFS_DIR_ITEM_KEY ?
4225                                 "DIR_ITEM" : "DIR_INDEX",
4226                         key->objectid, key->offset, namelen, name,
4227                         imode_to_type(mode));
4228                 goto out;
4229         }
4230
4231         /* Check whether inode_id/filetype/name match */
4232         node = path.nodes[0];
4233         slot = path.slots[0];
4234         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4235         total = btrfs_item_size_nr(node, slot);
4236         while (cur < total) {
4237                 ret = DIR_ITEM_MISMATCH;
4238                 name_len = btrfs_dir_name_len(node, di);
4239                 data_len = btrfs_dir_data_len(node, di);
4240
4241                 btrfs_dir_item_key_to_cpu(node, di, &location);
4242                 if (location.objectid != ref_key->objectid ||
4243                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4244                     location.offset != 0)
4245                         goto next;
4246
4247                 filetype = btrfs_dir_type(node, di);
4248                 if (imode_to_type(mode) != filetype)
4249                         goto next;
4250
4251                 if (cur + sizeof(*di) + name_len > total ||
4252                     name_len > BTRFS_NAME_LEN) {
4253                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4254                                 root->objectid,
4255                                 key->type == BTRFS_DIR_ITEM_KEY ?
4256                                 "DIR_ITEM" : "DIR_INDEX",
4257                                 key->objectid, key->offset, name_len);
4258
4259                         if (cur + sizeof(*di) > total)
4260                                 break;
4261                         len = min_t(u32, total - cur - sizeof(*di),
4262                                     BTRFS_NAME_LEN);
4263                 } else {
4264                         len = name_len;
4265                 }
4266
4267                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4268                 if (len != namelen || strncmp(namebuf, name, len))
4269                         goto next;
4270
4271                 ret = 0;
4272                 goto out;
4273 next:
4274                 len = sizeof(*di) + name_len + data_len;
4275                 di = (struct btrfs_dir_item *)((char *)di + len);
4276                 cur += len;
4277         }
4278         if (ret == DIR_ITEM_MISMATCH)
4279                 error(
4280                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4281                         root->objectid,
4282                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4283                         ref_key->objectid, ref_key->offset,
4284                         key->type == BTRFS_DIR_ITEM_KEY ?
4285                                 "DIR_ITEM" : "DIR_INDEX",
4286                         key->objectid, key->offset, namelen, name,
4287                         imode_to_type(mode));
4288 out:
4289         btrfs_release_path(&path);
4290         return ret;
4291 }
4292
4293 /*
4294  * Traverse the given INODE_REF and call find_dir_item() to find related
4295  * DIR_ITEM/DIR_INDEX.
4296  *
4297  * @root:       the root of the fs/file tree
4298  * @ref_key:    the key of the INODE_REF
4299  * @refs:       the count of INODE_REF
4300  * @mode:       the st_mode of INODE_ITEM
4301  *
4302  * Return 0 if no error occurred.
4303  */
4304 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4305                            struct extent_buffer *node, int slot, u64 *refs,
4306                            int mode)
4307 {
4308         struct btrfs_key key;
4309         struct btrfs_inode_ref *ref;
4310         char namebuf[BTRFS_NAME_LEN] = {0};
4311         u32 total;
4312         u32 cur = 0;
4313         u32 len;
4314         u32 name_len;
4315         u64 index;
4316         int ret, err = 0;
4317
4318         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4319         total = btrfs_item_size_nr(node, slot);
4320
4321 next:
4322         /* Update inode ref count */
4323         (*refs)++;
4324
4325         index = btrfs_inode_ref_index(node, ref);
4326         name_len = btrfs_inode_ref_name_len(node, ref);
4327         if (cur + sizeof(*ref) + name_len > total ||
4328             name_len > BTRFS_NAME_LEN) {
4329                 warning("root %llu INODE_REF[%llu %llu] name too long",
4330                         root->objectid, ref_key->objectid, ref_key->offset);
4331
4332                 if (total < cur + sizeof(*ref))
4333                         goto out;
4334                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4335         } else {
4336                 len = name_len;
4337         }
4338
4339         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4340
4341         /* Check root dir ref name */
4342         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4343                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4344                       root->objectid, ref_key->objectid, ref_key->offset,
4345                       namebuf);
4346                 err |= ROOT_DIR_ERROR;
4347         }
4348
4349         /* Find related DIR_INDEX */
4350         key.objectid = ref_key->offset;
4351         key.type = BTRFS_DIR_INDEX_KEY;
4352         key.offset = index;
4353         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4354         err |= ret;
4355
4356         /* Find related dir_item */
4357         key.objectid = ref_key->offset;
4358         key.type = BTRFS_DIR_ITEM_KEY;
4359         key.offset = btrfs_name_hash(namebuf, len);
4360         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4361         err |= ret;
4362
4363         len = sizeof(*ref) + name_len;
4364         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4365         cur += len;
4366         if (cur < total)
4367                 goto next;
4368
4369 out:
4370         return err;
4371 }
4372
4373 /*
4374  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4375  * DIR_ITEM/DIR_INDEX.
4376  *
4377  * @root:       the root of the fs/file tree
4378  * @ref_key:    the key of the INODE_EXTREF
4379  * @refs:       the count of INODE_EXTREF
4380  * @mode:       the st_mode of INODE_ITEM
4381  *
4382  * Return 0 if no error occurred.
4383  */
4384 static int check_inode_extref(struct btrfs_root *root,
4385                               struct btrfs_key *ref_key,
4386                               struct extent_buffer *node, int slot, u64 *refs,
4387                               int mode)
4388 {
4389         struct btrfs_key key;
4390         struct btrfs_inode_extref *extref;
4391         char namebuf[BTRFS_NAME_LEN] = {0};
4392         u32 total;
4393         u32 cur = 0;
4394         u32 len;
4395         u32 name_len;
4396         u64 index;
4397         u64 parent;
4398         int ret;
4399         int err = 0;
4400
4401         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4402         total = btrfs_item_size_nr(node, slot);
4403
4404 next:
4405         /* update inode ref count */
4406         (*refs)++;
4407         name_len = btrfs_inode_extref_name_len(node, extref);
4408         index = btrfs_inode_extref_index(node, extref);
4409         parent = btrfs_inode_extref_parent(node, extref);
4410         if (name_len <= BTRFS_NAME_LEN) {
4411                 len = name_len;
4412         } else {
4413                 len = BTRFS_NAME_LEN;
4414                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4415                         root->objectid, ref_key->objectid, ref_key->offset);
4416         }
4417         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4418
4419         /* Check root dir ref name */
4420         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4421                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4422                       root->objectid, ref_key->objectid, ref_key->offset,
4423                       namebuf);
4424                 err |= ROOT_DIR_ERROR;
4425         }
4426
4427         /* find related dir_index */
4428         key.objectid = parent;
4429         key.type = BTRFS_DIR_INDEX_KEY;
4430         key.offset = index;
4431         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4432         err |= ret;
4433
4434         /* find related dir_item */
4435         key.objectid = parent;
4436         key.type = BTRFS_DIR_ITEM_KEY;
4437         key.offset = btrfs_name_hash(namebuf, len);
4438         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4439         err |= ret;
4440
4441         len = sizeof(*extref) + name_len;
4442         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4443         cur += len;
4444
4445         if (cur < total)
4446                 goto next;
4447
4448         return err;
4449 }
4450
4451 /*
4452  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4453  * DIR_ITEM/DIR_INDEX match.
4454  *
4455  * @root:       the root of the fs/file tree
4456  * @key:        the key of the INODE_REF/INODE_EXTREF
4457  * @name:       the name in the INODE_REF/INODE_EXTREF
4458  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4459  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4460  * to (u64)-1
4461  * @ext_ref:    the EXTENDED_IREF feature
4462  *
4463  * Return 0 if no error occurred.
4464  * Return >0 for error bitmap
4465  */
4466 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4467                           char *name, int namelen, u64 index,
4468                           unsigned int ext_ref)
4469 {
4470         struct btrfs_path path;
4471         struct btrfs_inode_ref *ref;
4472         struct btrfs_inode_extref *extref;
4473         struct extent_buffer *node;
4474         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4475         u32 total;
4476         u32 cur = 0;
4477         u32 len;
4478         u32 ref_namelen;
4479         u64 ref_index;
4480         u64 parent;
4481         u64 dir_id;
4482         int slot;
4483         int ret;
4484
4485         btrfs_init_path(&path);
4486         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4487         if (ret) {
4488                 ret = INODE_REF_MISSING;
4489                 goto extref;
4490         }
4491
4492         node = path.nodes[0];
4493         slot = path.slots[0];
4494
4495         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4496         total = btrfs_item_size_nr(node, slot);
4497
4498         /* Iterate all entry of INODE_REF */
4499         while (cur < total) {
4500                 ret = INODE_REF_MISSING;
4501
4502                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4503                 ref_index = btrfs_inode_ref_index(node, ref);
4504                 if (index != (u64)-1 && index != ref_index)
4505                         goto next_ref;
4506
4507                 if (cur + sizeof(*ref) + ref_namelen > total ||
4508                     ref_namelen > BTRFS_NAME_LEN) {
4509                         warning("root %llu INODE %s[%llu %llu] name too long",
4510                                 root->objectid,
4511                                 key->type == BTRFS_INODE_REF_KEY ?
4512                                         "REF" : "EXTREF",
4513                                 key->objectid, key->offset);
4514
4515                         if (cur + sizeof(*ref) > total)
4516                                 break;
4517                         len = min_t(u32, total - cur - sizeof(*ref),
4518                                     BTRFS_NAME_LEN);
4519                 } else {
4520                         len = ref_namelen;
4521                 }
4522
4523                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4524                                    len);
4525
4526                 if (len != namelen || strncmp(ref_namebuf, name, len))
4527                         goto next_ref;
4528
4529                 ret = 0;
4530                 goto out;
4531 next_ref:
4532                 len = sizeof(*ref) + ref_namelen;
4533                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4534                 cur += len;
4535         }
4536
4537 extref:
4538         /* Skip if not support EXTENDED_IREF feature */
4539         if (!ext_ref)
4540                 goto out;
4541
4542         btrfs_release_path(&path);
4543         btrfs_init_path(&path);
4544
4545         dir_id = key->offset;
4546         key->type = BTRFS_INODE_EXTREF_KEY;
4547         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4548
4549         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4550         if (ret) {
4551                 ret = INODE_REF_MISSING;
4552                 goto out;
4553         }
4554
4555         node = path.nodes[0];
4556         slot = path.slots[0];
4557
4558         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4559         cur = 0;
4560         total = btrfs_item_size_nr(node, slot);
4561
4562         /* Iterate all entry of INODE_EXTREF */
4563         while (cur < total) {
4564                 ret = INODE_REF_MISSING;
4565
4566                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4567                 ref_index = btrfs_inode_extref_index(node, extref);
4568                 parent = btrfs_inode_extref_parent(node, extref);
4569                 if (index != (u64)-1 && index != ref_index)
4570                         goto next_extref;
4571
4572                 if (parent != dir_id)
4573                         goto next_extref;
4574
4575                 if (ref_namelen <= BTRFS_NAME_LEN) {
4576                         len = ref_namelen;
4577                 } else {
4578                         len = BTRFS_NAME_LEN;
4579                         warning("root %llu INODE %s[%llu %llu] name too long",
4580                                 root->objectid,
4581                                 key->type == BTRFS_INODE_REF_KEY ?
4582                                         "REF" : "EXTREF",
4583                                 key->objectid, key->offset);
4584                 }
4585                 read_extent_buffer(node, ref_namebuf,
4586                                    (unsigned long)(extref + 1), len);
4587
4588                 if (len != namelen || strncmp(ref_namebuf, name, len))
4589                         goto next_extref;
4590
4591                 ret = 0;
4592                 goto out;
4593
4594 next_extref:
4595                 len = sizeof(*extref) + ref_namelen;
4596                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4597                 cur += len;
4598
4599         }
4600 out:
4601         btrfs_release_path(&path);
4602         return ret;
4603 }
4604
4605 /*
4606  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4607  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4608  *
4609  * @root:       the root of the fs/file tree
4610  * @key:        the key of the INODE_REF/INODE_EXTREF
4611  * @size:       the st_size of the INODE_ITEM
4612  * @ext_ref:    the EXTENDED_IREF feature
4613  *
4614  * Return 0 if no error occurred.
4615  */
4616 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4617                           struct extent_buffer *node, int slot, u64 *size,
4618                           unsigned int ext_ref)
4619 {
4620         struct btrfs_dir_item *di;
4621         struct btrfs_inode_item *ii;
4622         struct btrfs_path path;
4623         struct btrfs_key location;
4624         char namebuf[BTRFS_NAME_LEN] = {0};
4625         u32 total;
4626         u32 cur = 0;
4627         u32 len;
4628         u32 name_len;
4629         u32 data_len;
4630         u8 filetype;
4631         u32 mode;
4632         u64 index;
4633         int ret;
4634         int err = 0;
4635
4636         /*
4637          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4638          * ignore index check.
4639          */
4640         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4641
4642         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4643         total = btrfs_item_size_nr(node, slot);
4644
4645         while (cur < total) {
4646                 data_len = btrfs_dir_data_len(node, di);
4647                 if (data_len)
4648                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX",
4651                               key->objectid, key->offset, data_len);
4652
4653                 name_len = btrfs_dir_name_len(node, di);
4654                 if (cur + sizeof(*di) + name_len > total ||
4655                     name_len > BTRFS_NAME_LEN) {
4656                         warning("root %llu %s[%llu %llu] name too long",
4657                                 root->objectid,
4658                                 key->type == BTRFS_DIR_ITEM_KEY ?
4659                                 "DIR_ITEM" : "DIR_INDEX",
4660                                 key->objectid, key->offset);
4661
4662                         if (cur + sizeof(*di) > total)
4663                                 break;
4664                         len = min_t(u32, total - cur - sizeof(*di),
4665                                     BTRFS_NAME_LEN);
4666                 } else {
4667                         len = name_len;
4668                 }
4669                 (*size) += name_len;
4670
4671                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4672                 filetype = btrfs_dir_type(node, di);
4673
4674                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4675                     key->offset != btrfs_name_hash(namebuf, len)) {
4676                         err |= -EIO;
4677                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4678                                 root->objectid, key->objectid, key->offset,
4679                                 namebuf, len, filetype, key->offset,
4680                                 btrfs_name_hash(namebuf, len));
4681                 }
4682
4683                 btrfs_init_path(&path);
4684                 btrfs_dir_item_key_to_cpu(node, di, &location);
4685
4686                 /* Ignore related ROOT_ITEM check */
4687                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4688                         goto next;
4689
4690                 /* Check relative INODE_ITEM(existence/filetype) */
4691                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4692                 if (ret) {
4693                         err |= INODE_ITEM_MISSING;
4694                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4695                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4696                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4697                               key->offset, location.objectid, name_len,
4698                               namebuf, filetype);
4699                         goto next;
4700                 }
4701
4702                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4703                                     struct btrfs_inode_item);
4704                 mode = btrfs_inode_mode(path.nodes[0], ii);
4705
4706                 if (imode_to_type(mode) != filetype) {
4707                         err |= INODE_ITEM_MISMATCH;
4708                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4709                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4710                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4711                               key->offset, name_len, namebuf, filetype);
4712                 }
4713
4714                 /* Check relative INODE_REF/INODE_EXTREF */
4715                 location.type = BTRFS_INODE_REF_KEY;
4716                 location.offset = key->objectid;
4717                 ret = find_inode_ref(root, &location, namebuf, len,
4718                                        index, ext_ref);
4719                 err |= ret;
4720                 if (ret & INODE_REF_MISSING)
4721                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4722                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4723                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4724                               key->offset, name_len, namebuf, filetype);
4725
4726 next:
4727                 btrfs_release_path(&path);
4728                 len = sizeof(*di) + name_len + data_len;
4729                 di = (struct btrfs_dir_item *)((char *)di + len);
4730                 cur += len;
4731
4732                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4733                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4734                               root->objectid, key->objectid, key->offset);
4735                         break;
4736                 }
4737         }
4738
4739         return err;
4740 }
4741
4742 /*
4743  * Check file extent datasum/hole, update the size of the file extents,
4744  * check and update the last offset of the file extent.
4745  *
4746  * @root:       the root of fs/file tree.
4747  * @fkey:       the key of the file extent.
4748  * @nodatasum:  INODE_NODATASUM feature.
4749  * @size:       the sum of all EXTENT_DATA items size for this inode.
4750  * @end:        the offset of the last extent.
4751  *
4752  * Return 0 if no error occurred.
4753  */
4754 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4755                              struct extent_buffer *node, int slot,
4756                              unsigned int nodatasum, u64 *size, u64 *end)
4757 {
4758         struct btrfs_file_extent_item *fi;
4759         u64 disk_bytenr;
4760         u64 disk_num_bytes;
4761         u64 extent_num_bytes;
4762         u64 extent_offset;
4763         u64 csum_found;         /* In byte size, sectorsize aligned */
4764         u64 search_start;       /* Logical range start we search for csum */
4765         u64 search_len;         /* Logical range len we search for csum */
4766         unsigned int extent_type;
4767         unsigned int is_hole;
4768         int compressed = 0;
4769         int ret;
4770         int err = 0;
4771
4772         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4773
4774         /* Check inline extent */
4775         extent_type = btrfs_file_extent_type(node, fi);
4776         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4777                 struct btrfs_item *e = btrfs_item_nr(slot);
4778                 u32 item_inline_len;
4779
4780                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4781                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4782                 compressed = btrfs_file_extent_compression(node, fi);
4783                 if (extent_num_bytes == 0) {
4784                         error(
4785                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4786                                 root->objectid, fkey->objectid, fkey->offset);
4787                         err |= FILE_EXTENT_ERROR;
4788                 }
4789                 if (!compressed && extent_num_bytes != item_inline_len) {
4790                         error(
4791                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4792                                 root->objectid, fkey->objectid, fkey->offset,
4793                                 extent_num_bytes, item_inline_len);
4794                         err |= FILE_EXTENT_ERROR;
4795                 }
4796                 *end += extent_num_bytes;
4797                 *size += extent_num_bytes;
4798                 return err;
4799         }
4800
4801         /* Check extent type */
4802         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4803                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4804                 err |= FILE_EXTENT_ERROR;
4805                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4806                       root->objectid, fkey->objectid, fkey->offset);
4807                 return err;
4808         }
4809
4810         /* Check REG_EXTENT/PREALLOC_EXTENT */
4811         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4812         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4813         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4814         extent_offset = btrfs_file_extent_offset(node, fi);
4815         compressed = btrfs_file_extent_compression(node, fi);
4816         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4817
4818         /*
4819          * Check EXTENT_DATA csum
4820          *
4821          * For plain (uncompressed) extent, we should only check the range
4822          * we're referring to, as it's possible that part of prealloc extent
4823          * has been written, and has csum:
4824          *
4825          * |<--- Original large preallocated extent A ---->|
4826          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4827          *      No csum                         Has csum
4828          *
4829          * For compressed extent, we should check the whole range.
4830          */
4831         if (!compressed) {
4832                 search_start = disk_bytenr + extent_offset;
4833                 search_len = extent_num_bytes;
4834         } else {
4835                 search_start = disk_bytenr;
4836                 search_len = disk_num_bytes;
4837         }
4838         ret = count_csum_range(root, search_start, search_len, &csum_found);
4839         if (csum_found > 0 && nodatasum) {
4840                 err |= ODD_CSUM_ITEM;
4841                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4842                       root->objectid, fkey->objectid, fkey->offset);
4843         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4844                    !is_hole && (ret < 0 || csum_found < search_len)) {
4845                 err |= CSUM_ITEM_MISSING;
4846                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4847                       root->objectid, fkey->objectid, fkey->offset,
4848                       csum_found, search_len);
4849         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4850                 err |= ODD_CSUM_ITEM;
4851                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4852                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4853         }
4854
4855         /* Check EXTENT_DATA hole */
4856         if (!no_holes && *end != fkey->offset) {
4857                 err |= FILE_EXTENT_ERROR;
4858                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4859                       root->objectid, fkey->objectid, fkey->offset);
4860         }
4861
4862         *end += extent_num_bytes;
4863         if (!is_hole)
4864                 *size += extent_num_bytes;
4865
4866         return err;
4867 }
4868
4869 /*
4870  * Check INODE_ITEM and related ITEMs (the same inode number)
4871  * 1. check link count
4872  * 2. check inode ref/extref
4873  * 3. check dir item/index
4874  *
4875  * @ext_ref:    the EXTENDED_IREF feature
4876  *
4877  * Return 0 if no error occurred.
4878  * Return >0 for error or hit the traversal is done(by error bitmap)
4879  */
4880 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4881                             unsigned int ext_ref)
4882 {
4883         struct extent_buffer *node;
4884         struct btrfs_inode_item *ii;
4885         struct btrfs_key key;
4886         u64 inode_id;
4887         u32 mode;
4888         u64 nlink;
4889         u64 nbytes;
4890         u64 isize;
4891         u64 size = 0;
4892         u64 refs = 0;
4893         u64 extent_end = 0;
4894         u64 extent_size = 0;
4895         unsigned int dir;
4896         unsigned int nodatasum;
4897         int slot;
4898         int ret;
4899         int err = 0;
4900
4901         node = path->nodes[0];
4902         slot = path->slots[0];
4903
4904         btrfs_item_key_to_cpu(node, &key, slot);
4905         inode_id = key.objectid;
4906
4907         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4908                 ret = btrfs_next_item(root, path);
4909                 if (ret > 0)
4910                         err |= LAST_ITEM;
4911                 return err;
4912         }
4913
4914         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4915         isize = btrfs_inode_size(node, ii);
4916         nbytes = btrfs_inode_nbytes(node, ii);
4917         mode = btrfs_inode_mode(node, ii);
4918         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4919         nlink = btrfs_inode_nlink(node, ii);
4920         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4921
4922         while (1) {
4923                 ret = btrfs_next_item(root, path);
4924                 if (ret < 0) {
4925                         /* out will fill 'err' rusing current statistics */
4926                         goto out;
4927                 } else if (ret > 0) {
4928                         err |= LAST_ITEM;
4929                         goto out;
4930                 }
4931
4932                 node = path->nodes[0];
4933                 slot = path->slots[0];
4934                 btrfs_item_key_to_cpu(node, &key, slot);
4935                 if (key.objectid != inode_id)
4936                         goto out;
4937
4938                 switch (key.type) {
4939                 case BTRFS_INODE_REF_KEY:
4940                         ret = check_inode_ref(root, &key, node, slot, &refs,
4941                                               mode);
4942                         err |= ret;
4943                         break;
4944                 case BTRFS_INODE_EXTREF_KEY:
4945                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4946                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4947                                         root->objectid, key.objectid,
4948                                         key.offset);
4949                         ret = check_inode_extref(root, &key, node, slot, &refs,
4950                                                  mode);
4951                         err |= ret;
4952                         break;
4953                 case BTRFS_DIR_ITEM_KEY:
4954                 case BTRFS_DIR_INDEX_KEY:
4955                         if (!dir) {
4956                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4957                                         root->objectid, inode_id,
4958                                         imode_to_type(mode), key.objectid,
4959                                         key.offset);
4960                         }
4961                         ret = check_dir_item(root, &key, node, slot, &size,
4962                                              ext_ref);
4963                         err |= ret;
4964                         break;
4965                 case BTRFS_EXTENT_DATA_KEY:
4966                         if (dir) {
4967                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4968                                         root->objectid, inode_id, key.objectid,
4969                                         key.offset);
4970                         }
4971                         ret = check_file_extent(root, &key, node, slot,
4972                                                 nodatasum, &extent_size,
4973                                                 &extent_end);
4974                         err |= ret;
4975                         break;
4976                 case BTRFS_XATTR_ITEM_KEY:
4977                         break;
4978                 default:
4979                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4980                               key.objectid, key.type, key.offset);
4981                 }
4982         }
4983
4984 out:
4985         /* verify INODE_ITEM nlink/isize/nbytes */
4986         if (dir) {
4987                 if (nlink != 1) {
4988                         err |= LINK_COUNT_ERROR;
4989                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4990                               root->objectid, inode_id, nlink);
4991                 }
4992
4993                 /*
4994                  * Just a warning, as dir inode nbytes is just an
4995                  * instructive value.
4996                  */
4997                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4998                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4999                                 root->objectid, inode_id,
5000                                 root->fs_info->nodesize);
5001                 }
5002
5003                 if (isize != size) {
5004                         err |= ISIZE_ERROR;
5005                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5006                               root->objectid, inode_id, isize, size);
5007                 }
5008         } else {
5009                 if (nlink != refs) {
5010                         err |= LINK_COUNT_ERROR;
5011                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5012                               root->objectid, inode_id, nlink, refs);
5013                 } else if (!nlink) {
5014                         err |= ORPHAN_ITEM;
5015                 }
5016
5017                 if (!nbytes && !no_holes && extent_end < isize) {
5018                         err |= NBYTES_ERROR;
5019                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5020                               root->objectid, inode_id, isize);
5021                 }
5022
5023                 if (nbytes != extent_size) {
5024                         err |= NBYTES_ERROR;
5025                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5026                               root->objectid, inode_id, nbytes, extent_size);
5027                 }
5028         }
5029
5030         return err;
5031 }
5032
5033 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5034 {
5035         struct btrfs_path path;
5036         struct btrfs_key key;
5037         int err = 0;
5038         int ret;
5039
5040         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5041         key.type = BTRFS_INODE_ITEM_KEY;
5042         key.offset = 0;
5043
5044         /* For root being dropped, we don't need to check first inode */
5045         if (btrfs_root_refs(&root->root_item) == 0 &&
5046             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5047             key.objectid)
5048                 return 0;
5049
5050         btrfs_init_path(&path);
5051
5052         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5053         if (ret < 0)
5054                 goto out;
5055         if (ret > 0) {
5056                 ret = 0;
5057                 err |= INODE_ITEM_MISSING;
5058                 error("first inode item of root %llu is missing",
5059                       root->objectid);
5060         }
5061
5062         err |= check_inode_item(root, &path, ext_ref);
5063         err &= ~LAST_ITEM;
5064         if (err && !ret)
5065                 ret = -EIO;
5066 out:
5067         btrfs_release_path(&path);
5068         return ret;
5069 }
5070
5071 /*
5072  * Iterate all item on the tree and call check_inode_item() to check.
5073  *
5074  * @root:       the root of the tree to be checked.
5075  * @ext_ref:    the EXTENDED_IREF feature
5076  *
5077  * Return 0 if no error found.
5078  * Return <0 for error.
5079  */
5080 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5081 {
5082         struct btrfs_path path;
5083         struct node_refs nrefs;
5084         struct btrfs_root_item *root_item = &root->root_item;
5085         int ret;
5086         int level;
5087         int err = 0;
5088
5089         /*
5090          * We need to manually check the first inode item(256)
5091          * As the following traversal function will only start from
5092          * the first inode item in the leaf, if inode item(256) is missing
5093          * we will just skip it forever.
5094          */
5095         ret = check_fs_first_inode(root, ext_ref);
5096         if (ret < 0)
5097                 return ret;
5098
5099         memset(&nrefs, 0, sizeof(nrefs));
5100         level = btrfs_header_level(root->node);
5101         btrfs_init_path(&path);
5102
5103         if (btrfs_root_refs(root_item) > 0 ||
5104             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5105                 path.nodes[level] = root->node;
5106                 path.slots[level] = 0;
5107                 extent_buffer_get(root->node);
5108         } else {
5109                 struct btrfs_key key;
5110
5111                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5112                 level = root_item->drop_level;
5113                 path.lowest_level = level;
5114                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5115                 if (ret < 0)
5116                         goto out;
5117                 ret = 0;
5118         }
5119
5120         while (1) {
5121                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5122                 err |= !!ret;
5123
5124                 /* if ret is negative, walk shall stop */
5125                 if (ret < 0) {
5126                         ret = err;
5127                         break;
5128                 }
5129
5130                 ret = walk_up_tree_v2(root, &path, &level);
5131                 if (ret != 0) {
5132                         /* Normal exit, reset ret to err */
5133                         ret = err;
5134                         break;
5135                 }
5136         }
5137
5138 out:
5139         btrfs_release_path(&path);
5140         return ret;
5141 }
5142
5143 /*
5144  * Find the relative ref for root_ref and root_backref.
5145  *
5146  * @root:       the root of the root tree.
5147  * @ref_key:    the key of the root ref.
5148  *
5149  * Return 0 if no error occurred.
5150  */
5151 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5152                           struct extent_buffer *node, int slot)
5153 {
5154         struct btrfs_path path;
5155         struct btrfs_key key;
5156         struct btrfs_root_ref *ref;
5157         struct btrfs_root_ref *backref;
5158         char ref_name[BTRFS_NAME_LEN] = {0};
5159         char backref_name[BTRFS_NAME_LEN] = {0};
5160         u64 ref_dirid;
5161         u64 ref_seq;
5162         u32 ref_namelen;
5163         u64 backref_dirid;
5164         u64 backref_seq;
5165         u32 backref_namelen;
5166         u32 len;
5167         int ret;
5168         int err = 0;
5169
5170         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5171         ref_dirid = btrfs_root_ref_dirid(node, ref);
5172         ref_seq = btrfs_root_ref_sequence(node, ref);
5173         ref_namelen = btrfs_root_ref_name_len(node, ref);
5174
5175         if (ref_namelen <= BTRFS_NAME_LEN) {
5176                 len = ref_namelen;
5177         } else {
5178                 len = BTRFS_NAME_LEN;
5179                 warning("%s[%llu %llu] ref_name too long",
5180                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5181                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5182                         ref_key->offset);
5183         }
5184         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5185
5186         /* Find relative root_ref */
5187         key.objectid = ref_key->offset;
5188         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5189         key.offset = ref_key->objectid;
5190
5191         btrfs_init_path(&path);
5192         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5193         if (ret) {
5194                 err |= ROOT_REF_MISSING;
5195                 error("%s[%llu %llu] couldn't find relative ref",
5196                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5197                       "ROOT_REF" : "ROOT_BACKREF",
5198                       ref_key->objectid, ref_key->offset);
5199                 goto out;
5200         }
5201
5202         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5203                                  struct btrfs_root_ref);
5204         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5205         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5206         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5207
5208         if (backref_namelen <= BTRFS_NAME_LEN) {
5209                 len = backref_namelen;
5210         } else {
5211                 len = BTRFS_NAME_LEN;
5212                 warning("%s[%llu %llu] ref_name too long",
5213                         key.type == BTRFS_ROOT_REF_KEY ?
5214                         "ROOT_REF" : "ROOT_BACKREF",
5215                         key.objectid, key.offset);
5216         }
5217         read_extent_buffer(path.nodes[0], backref_name,
5218                            (unsigned long)(backref + 1), len);
5219
5220         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5221             ref_namelen != backref_namelen ||
5222             strncmp(ref_name, backref_name, len)) {
5223                 err |= ROOT_REF_MISMATCH;
5224                 error("%s[%llu %llu] mismatch relative ref",
5225                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5226                       "ROOT_REF" : "ROOT_BACKREF",
5227                       ref_key->objectid, ref_key->offset);
5228         }
5229 out:
5230         btrfs_release_path(&path);
5231         return err;
5232 }
5233
5234 /*
5235  * Check all fs/file tree in low_memory mode.
5236  *
5237  * 1. for fs tree root item, call check_fs_root_v2()
5238  * 2. for fs tree root ref/backref, call check_root_ref()
5239  *
5240  * Return 0 if no error occurred.
5241  */
5242 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5243 {
5244         struct btrfs_root *tree_root = fs_info->tree_root;
5245         struct btrfs_root *cur_root = NULL;
5246         struct btrfs_path path;
5247         struct btrfs_key key;
5248         struct extent_buffer *node;
5249         unsigned int ext_ref;
5250         int slot;
5251         int ret;
5252         int err = 0;
5253
5254         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5255
5256         btrfs_init_path(&path);
5257         key.objectid = BTRFS_FS_TREE_OBJECTID;
5258         key.offset = 0;
5259         key.type = BTRFS_ROOT_ITEM_KEY;
5260
5261         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5262         if (ret < 0) {
5263                 err = ret;
5264                 goto out;
5265         } else if (ret > 0) {
5266                 err = -ENOENT;
5267                 goto out;
5268         }
5269
5270         while (1) {
5271                 node = path.nodes[0];
5272                 slot = path.slots[0];
5273                 btrfs_item_key_to_cpu(node, &key, slot);
5274                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5275                         goto out;
5276                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5277                     fs_root_objectid(key.objectid)) {
5278                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5279                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5280                                                                        &key);
5281                         } else {
5282                                 key.offset = (u64)-1;
5283                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5284                         }
5285
5286                         if (IS_ERR(cur_root)) {
5287                                 error("Fail to read fs/subvol tree: %lld",
5288                                       key.objectid);
5289                                 err = -EIO;
5290                                 goto next;
5291                         }
5292
5293                         ret = check_fs_root_v2(cur_root, ext_ref);
5294                         err |= ret;
5295
5296                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5297                                 btrfs_free_fs_root(cur_root);
5298                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5299                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5300                         ret = check_root_ref(tree_root, &key, node, slot);
5301                         err |= ret;
5302                 }
5303 next:
5304                 ret = btrfs_next_item(tree_root, &path);
5305                 if (ret > 0)
5306                         goto out;
5307                 if (ret < 0) {
5308                         err = ret;
5309                         goto out;
5310                 }
5311         }
5312
5313 out:
5314         btrfs_release_path(&path);
5315         return err;
5316 }
5317
5318 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5319 {
5320         struct list_head *cur = rec->backrefs.next;
5321         struct extent_backref *back;
5322         struct tree_backref *tback;
5323         struct data_backref *dback;
5324         u64 found = 0;
5325         int err = 0;
5326
5327         while(cur != &rec->backrefs) {
5328                 back = to_extent_backref(cur);
5329                 cur = cur->next;
5330                 if (!back->found_extent_tree) {
5331                         err = 1;
5332                         if (!print_errs)
5333                                 goto out;
5334                         if (back->is_data) {
5335                                 dback = to_data_backref(back);
5336                                 fprintf(stderr, "Backref %llu %s %llu"
5337                                         " owner %llu offset %llu num_refs %lu"
5338                                         " not found in extent tree\n",
5339                                         (unsigned long long)rec->start,
5340                                         back->full_backref ?
5341                                         "parent" : "root",
5342                                         back->full_backref ?
5343                                         (unsigned long long)dback->parent:
5344                                         (unsigned long long)dback->root,
5345                                         (unsigned long long)dback->owner,
5346                                         (unsigned long long)dback->offset,
5347                                         (unsigned long)dback->num_refs);
5348                         } else {
5349                                 tback = to_tree_backref(back);
5350                                 fprintf(stderr, "Backref %llu parent %llu"
5351                                         " root %llu not found in extent tree\n",
5352                                         (unsigned long long)rec->start,
5353                                         (unsigned long long)tback->parent,
5354                                         (unsigned long long)tback->root);
5355                         }
5356                 }
5357                 if (!back->is_data && !back->found_ref) {
5358                         err = 1;
5359                         if (!print_errs)
5360                                 goto out;
5361                         tback = to_tree_backref(back);
5362                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5363                                 (unsigned long long)rec->start,
5364                                 back->full_backref ? "parent" : "root",
5365                                 back->full_backref ?
5366                                 (unsigned long long)tback->parent :
5367                                 (unsigned long long)tback->root, back);
5368                 }
5369                 if (back->is_data) {
5370                         dback = to_data_backref(back);
5371                         if (dback->found_ref != dback->num_refs) {
5372                                 err = 1;
5373                                 if (!print_errs)
5374                                         goto out;
5375                                 fprintf(stderr, "Incorrect local backref count"
5376                                         " on %llu %s %llu owner %llu"
5377                                         " offset %llu found %u wanted %u back %p\n",
5378                                         (unsigned long long)rec->start,
5379                                         back->full_backref ?
5380                                         "parent" : "root",
5381                                         back->full_backref ?
5382                                         (unsigned long long)dback->parent:
5383                                         (unsigned long long)dback->root,
5384                                         (unsigned long long)dback->owner,
5385                                         (unsigned long long)dback->offset,
5386                                         dback->found_ref, dback->num_refs, back);
5387                         }
5388                         if (dback->disk_bytenr != rec->start) {
5389                                 err = 1;
5390                                 if (!print_errs)
5391                                         goto out;
5392                                 fprintf(stderr, "Backref disk bytenr does not"
5393                                         " match extent record, bytenr=%llu, "
5394                                         "ref bytenr=%llu\n",
5395                                         (unsigned long long)rec->start,
5396                                         (unsigned long long)dback->disk_bytenr);
5397                         }
5398
5399                         if (dback->bytes != rec->nr) {
5400                                 err = 1;
5401                                 if (!print_errs)
5402                                         goto out;
5403                                 fprintf(stderr, "Backref bytes do not match "
5404                                         "extent backref, bytenr=%llu, ref "
5405                                         "bytes=%llu, backref bytes=%llu\n",
5406                                         (unsigned long long)rec->start,
5407                                         (unsigned long long)rec->nr,
5408                                         (unsigned long long)dback->bytes);
5409                         }
5410                 }
5411                 if (!back->is_data) {
5412                         found += 1;
5413                 } else {
5414                         dback = to_data_backref(back);
5415                         found += dback->found_ref;
5416                 }
5417         }
5418         if (found != rec->refs) {
5419                 err = 1;
5420                 if (!print_errs)
5421                         goto out;
5422                 fprintf(stderr, "Incorrect global backref count "
5423                         "on %llu found %llu wanted %llu\n",
5424                         (unsigned long long)rec->start,
5425                         (unsigned long long)found,
5426                         (unsigned long long)rec->refs);
5427         }
5428 out:
5429         return err;
5430 }
5431
5432 static int free_all_extent_backrefs(struct extent_record *rec)
5433 {
5434         struct extent_backref *back;
5435         struct list_head *cur;
5436         while (!list_empty(&rec->backrefs)) {
5437                 cur = rec->backrefs.next;
5438                 back = to_extent_backref(cur);
5439                 list_del(cur);
5440                 free(back);
5441         }
5442         return 0;
5443 }
5444
5445 static void free_extent_record_cache(struct cache_tree *extent_cache)
5446 {
5447         struct cache_extent *cache;
5448         struct extent_record *rec;
5449
5450         while (1) {
5451                 cache = first_cache_extent(extent_cache);
5452                 if (!cache)
5453                         break;
5454                 rec = container_of(cache, struct extent_record, cache);
5455                 remove_cache_extent(extent_cache, cache);
5456                 free_all_extent_backrefs(rec);
5457                 free(rec);
5458         }
5459 }
5460
5461 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5462                                  struct extent_record *rec)
5463 {
5464         if (rec->content_checked && rec->owner_ref_checked &&
5465             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5466             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5467             !rec->bad_full_backref && !rec->crossing_stripes &&
5468             !rec->wrong_chunk_type) {
5469                 remove_cache_extent(extent_cache, &rec->cache);
5470                 free_all_extent_backrefs(rec);
5471                 list_del_init(&rec->list);
5472                 free(rec);
5473         }
5474         return 0;
5475 }
5476
5477 static int check_owner_ref(struct btrfs_root *root,
5478                             struct extent_record *rec,
5479                             struct extent_buffer *buf)
5480 {
5481         struct extent_backref *node;
5482         struct tree_backref *back;
5483         struct btrfs_root *ref_root;
5484         struct btrfs_key key;
5485         struct btrfs_path path;
5486         struct extent_buffer *parent;
5487         int level;
5488         int found = 0;
5489         int ret;
5490
5491         list_for_each_entry(node, &rec->backrefs, list) {
5492                 if (node->is_data)
5493                         continue;
5494                 if (!node->found_ref)
5495                         continue;
5496                 if (node->full_backref)
5497                         continue;
5498                 back = to_tree_backref(node);
5499                 if (btrfs_header_owner(buf) == back->root)
5500                         return 0;
5501         }
5502         BUG_ON(rec->is_root);
5503
5504         /* try to find the block by search corresponding fs tree */
5505         key.objectid = btrfs_header_owner(buf);
5506         key.type = BTRFS_ROOT_ITEM_KEY;
5507         key.offset = (u64)-1;
5508
5509         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5510         if (IS_ERR(ref_root))
5511                 return 1;
5512
5513         level = btrfs_header_level(buf);
5514         if (level == 0)
5515                 btrfs_item_key_to_cpu(buf, &key, 0);
5516         else
5517                 btrfs_node_key_to_cpu(buf, &key, 0);
5518
5519         btrfs_init_path(&path);
5520         path.lowest_level = level + 1;
5521         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5522         if (ret < 0)
5523                 return 0;
5524
5525         parent = path.nodes[level + 1];
5526         if (parent && buf->start == btrfs_node_blockptr(parent,
5527                                                         path.slots[level + 1]))
5528                 found = 1;
5529
5530         btrfs_release_path(&path);
5531         return found ? 0 : 1;
5532 }
5533
5534 static int is_extent_tree_record(struct extent_record *rec)
5535 {
5536         struct list_head *cur = rec->backrefs.next;
5537         struct extent_backref *node;
5538         struct tree_backref *back;
5539         int is_extent = 0;
5540
5541         while(cur != &rec->backrefs) {
5542                 node = to_extent_backref(cur);
5543                 cur = cur->next;
5544                 if (node->is_data)
5545                         return 0;
5546                 back = to_tree_backref(node);
5547                 if (node->full_backref)
5548                         return 0;
5549                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5550                         is_extent = 1;
5551         }
5552         return is_extent;
5553 }
5554
5555
5556 static int record_bad_block_io(struct btrfs_fs_info *info,
5557                                struct cache_tree *extent_cache,
5558                                u64 start, u64 len)
5559 {
5560         struct extent_record *rec;
5561         struct cache_extent *cache;
5562         struct btrfs_key key;
5563
5564         cache = lookup_cache_extent(extent_cache, start, len);
5565         if (!cache)
5566                 return 0;
5567
5568         rec = container_of(cache, struct extent_record, cache);
5569         if (!is_extent_tree_record(rec))
5570                 return 0;
5571
5572         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5573         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5574 }
5575
5576 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5577                        struct extent_buffer *buf, int slot)
5578 {
5579         if (btrfs_header_level(buf)) {
5580                 struct btrfs_key_ptr ptr1, ptr2;
5581
5582                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5583                                    sizeof(struct btrfs_key_ptr));
5584                 read_extent_buffer(buf, &ptr2,
5585                                    btrfs_node_key_ptr_offset(slot + 1),
5586                                    sizeof(struct btrfs_key_ptr));
5587                 write_extent_buffer(buf, &ptr1,
5588                                     btrfs_node_key_ptr_offset(slot + 1),
5589                                     sizeof(struct btrfs_key_ptr));
5590                 write_extent_buffer(buf, &ptr2,
5591                                     btrfs_node_key_ptr_offset(slot),
5592                                     sizeof(struct btrfs_key_ptr));
5593                 if (slot == 0) {
5594                         struct btrfs_disk_key key;
5595                         btrfs_node_key(buf, &key, 0);
5596                         btrfs_fixup_low_keys(root, path, &key,
5597                                              btrfs_header_level(buf) + 1);
5598                 }
5599         } else {
5600                 struct btrfs_item *item1, *item2;
5601                 struct btrfs_key k1, k2;
5602                 char *item1_data, *item2_data;
5603                 u32 item1_offset, item2_offset, item1_size, item2_size;
5604
5605                 item1 = btrfs_item_nr(slot);
5606                 item2 = btrfs_item_nr(slot + 1);
5607                 btrfs_item_key_to_cpu(buf, &k1, slot);
5608                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5609                 item1_offset = btrfs_item_offset(buf, item1);
5610                 item2_offset = btrfs_item_offset(buf, item2);
5611                 item1_size = btrfs_item_size(buf, item1);
5612                 item2_size = btrfs_item_size(buf, item2);
5613
5614                 item1_data = malloc(item1_size);
5615                 if (!item1_data)
5616                         return -ENOMEM;
5617                 item2_data = malloc(item2_size);
5618                 if (!item2_data) {
5619                         free(item1_data);
5620                         return -ENOMEM;
5621                 }
5622
5623                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5624                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5625
5626                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5627                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5628                 free(item1_data);
5629                 free(item2_data);
5630
5631                 btrfs_set_item_offset(buf, item1, item2_offset);
5632                 btrfs_set_item_offset(buf, item2, item1_offset);
5633                 btrfs_set_item_size(buf, item1, item2_size);
5634                 btrfs_set_item_size(buf, item2, item1_size);
5635
5636                 path->slots[0] = slot;
5637                 btrfs_set_item_key_unsafe(root, path, &k2);
5638                 path->slots[0] = slot + 1;
5639                 btrfs_set_item_key_unsafe(root, path, &k1);
5640         }
5641         return 0;
5642 }
5643
5644 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5645 {
5646         struct extent_buffer *buf;
5647         struct btrfs_key k1, k2;
5648         int i;
5649         int level = path->lowest_level;
5650         int ret = -EIO;
5651
5652         buf = path->nodes[level];
5653         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5654                 if (level) {
5655                         btrfs_node_key_to_cpu(buf, &k1, i);
5656                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5657                 } else {
5658                         btrfs_item_key_to_cpu(buf, &k1, i);
5659                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5660                 }
5661                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5662                         continue;
5663                 ret = swap_values(root, path, buf, i);
5664                 if (ret)
5665                         break;
5666                 btrfs_mark_buffer_dirty(buf);
5667                 i = 0;
5668         }
5669         return ret;
5670 }
5671
5672 static int delete_bogus_item(struct btrfs_root *root,
5673                              struct btrfs_path *path,
5674                              struct extent_buffer *buf, int slot)
5675 {
5676         struct btrfs_key key;
5677         int nritems = btrfs_header_nritems(buf);
5678
5679         btrfs_item_key_to_cpu(buf, &key, slot);
5680
5681         /* These are all the keys we can deal with missing. */
5682         if (key.type != BTRFS_DIR_INDEX_KEY &&
5683             key.type != BTRFS_EXTENT_ITEM_KEY &&
5684             key.type != BTRFS_METADATA_ITEM_KEY &&
5685             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5686             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5687                 return -1;
5688
5689         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5690                (unsigned long long)key.objectid, key.type,
5691                (unsigned long long)key.offset, slot, buf->start);
5692         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5693                               btrfs_item_nr_offset(slot + 1),
5694                               sizeof(struct btrfs_item) *
5695                               (nritems - slot - 1));
5696         btrfs_set_header_nritems(buf, nritems - 1);
5697         if (slot == 0) {
5698                 struct btrfs_disk_key disk_key;
5699
5700                 btrfs_item_key(buf, &disk_key, 0);
5701                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5702         }
5703         btrfs_mark_buffer_dirty(buf);
5704         return 0;
5705 }
5706
5707 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5708 {
5709         struct extent_buffer *buf;
5710         int i;
5711         int ret = 0;
5712
5713         /* We should only get this for leaves */
5714         BUG_ON(path->lowest_level);
5715         buf = path->nodes[0];
5716 again:
5717         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5718                 unsigned int shift = 0, offset;
5719
5720                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5721                     BTRFS_LEAF_DATA_SIZE(root)) {
5722                         if (btrfs_item_end_nr(buf, i) >
5723                             BTRFS_LEAF_DATA_SIZE(root)) {
5724                                 ret = delete_bogus_item(root, path, buf, i);
5725                                 if (!ret)
5726                                         goto again;
5727                                 fprintf(stderr, "item is off the end of the "
5728                                         "leaf, can't fix\n");
5729                                 ret = -EIO;
5730                                 break;
5731                         }
5732                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5733                                 btrfs_item_end_nr(buf, i);
5734                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5735                            btrfs_item_offset_nr(buf, i - 1)) {
5736                         if (btrfs_item_end_nr(buf, i) >
5737                             btrfs_item_offset_nr(buf, i - 1)) {
5738                                 ret = delete_bogus_item(root, path, buf, i);
5739                                 if (!ret)
5740                                         goto again;
5741                                 fprintf(stderr, "items overlap, can't fix\n");
5742                                 ret = -EIO;
5743                                 break;
5744                         }
5745                         shift = btrfs_item_offset_nr(buf, i - 1) -
5746                                 btrfs_item_end_nr(buf, i);
5747                 }
5748                 if (!shift)
5749                         continue;
5750
5751                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5752                        i, shift, (unsigned long long)buf->start);
5753                 offset = btrfs_item_offset_nr(buf, i);
5754                 memmove_extent_buffer(buf,
5755                                       btrfs_leaf_data(buf) + offset + shift,
5756                                       btrfs_leaf_data(buf) + offset,
5757                                       btrfs_item_size_nr(buf, i));
5758                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5759                                       offset + shift);
5760                 btrfs_mark_buffer_dirty(buf);
5761         }
5762
5763         /*
5764          * We may have moved things, in which case we want to exit so we don't
5765          * write those changes out.  Once we have proper abort functionality in
5766          * progs this can be changed to something nicer.
5767          */
5768         BUG_ON(ret);
5769         return ret;
5770 }
5771
5772 /*
5773  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5774  * then just return -EIO.
5775  */
5776 static int try_to_fix_bad_block(struct btrfs_root *root,
5777                                 struct extent_buffer *buf,
5778                                 enum btrfs_tree_block_status status)
5779 {
5780         struct btrfs_trans_handle *trans;
5781         struct ulist *roots;
5782         struct ulist_node *node;
5783         struct btrfs_root *search_root;
5784         struct btrfs_path path;
5785         struct ulist_iterator iter;
5786         struct btrfs_key root_key, key;
5787         int ret;
5788
5789         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5790             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5791                 return -EIO;
5792
5793         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5794         if (ret)
5795                 return -EIO;
5796
5797         btrfs_init_path(&path);
5798         ULIST_ITER_INIT(&iter);
5799         while ((node = ulist_next(roots, &iter))) {
5800                 root_key.objectid = node->val;
5801                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5802                 root_key.offset = (u64)-1;
5803
5804                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5805                 if (IS_ERR(root)) {
5806                         ret = -EIO;
5807                         break;
5808                 }
5809
5810
5811                 trans = btrfs_start_transaction(search_root, 0);
5812                 if (IS_ERR(trans)) {
5813                         ret = PTR_ERR(trans);
5814                         break;
5815                 }
5816
5817                 path.lowest_level = btrfs_header_level(buf);
5818                 path.skip_check_block = 1;
5819                 if (path.lowest_level)
5820                         btrfs_node_key_to_cpu(buf, &key, 0);
5821                 else
5822                         btrfs_item_key_to_cpu(buf, &key, 0);
5823                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5824                 if (ret) {
5825                         ret = -EIO;
5826                         btrfs_commit_transaction(trans, search_root);
5827                         break;
5828                 }
5829                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5830                         ret = fix_key_order(search_root, &path);
5831                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5832                         ret = fix_item_offset(search_root, &path);
5833                 if (ret) {
5834                         btrfs_commit_transaction(trans, search_root);
5835                         break;
5836                 }
5837                 btrfs_release_path(&path);
5838                 btrfs_commit_transaction(trans, search_root);
5839         }
5840         ulist_free(roots);
5841         btrfs_release_path(&path);
5842         return ret;
5843 }
5844
5845 static int check_block(struct btrfs_root *root,
5846                        struct cache_tree *extent_cache,
5847                        struct extent_buffer *buf, u64 flags)
5848 {
5849         struct extent_record *rec;
5850         struct cache_extent *cache;
5851         struct btrfs_key key;
5852         enum btrfs_tree_block_status status;
5853         int ret = 0;
5854         int level;
5855
5856         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5857         if (!cache)
5858                 return 1;
5859         rec = container_of(cache, struct extent_record, cache);
5860         rec->generation = btrfs_header_generation(buf);
5861
5862         level = btrfs_header_level(buf);
5863         if (btrfs_header_nritems(buf) > 0) {
5864
5865                 if (level == 0)
5866                         btrfs_item_key_to_cpu(buf, &key, 0);
5867                 else
5868                         btrfs_node_key_to_cpu(buf, &key, 0);
5869
5870                 rec->info_objectid = key.objectid;
5871         }
5872         rec->info_level = level;
5873
5874         if (btrfs_is_leaf(buf))
5875                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5876         else
5877                 status = btrfs_check_node(root, &rec->parent_key, buf);
5878
5879         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5880                 if (repair)
5881                         status = try_to_fix_bad_block(root, buf, status);
5882                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5883                         ret = -EIO;
5884                         fprintf(stderr, "bad block %llu\n",
5885                                 (unsigned long long)buf->start);
5886                 } else {
5887                         /*
5888                          * Signal to callers we need to start the scan over
5889                          * again since we'll have cowed blocks.
5890                          */
5891                         ret = -EAGAIN;
5892                 }
5893         } else {
5894                 rec->content_checked = 1;
5895                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5896                         rec->owner_ref_checked = 1;
5897                 else {
5898                         ret = check_owner_ref(root, rec, buf);
5899                         if (!ret)
5900                                 rec->owner_ref_checked = 1;
5901                 }
5902         }
5903         if (!ret)
5904                 maybe_free_extent_rec(extent_cache, rec);
5905         return ret;
5906 }
5907
5908 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5909                                                 u64 parent, u64 root)
5910 {
5911         struct list_head *cur = rec->backrefs.next;
5912         struct extent_backref *node;
5913         struct tree_backref *back;
5914
5915         while(cur != &rec->backrefs) {
5916                 node = to_extent_backref(cur);
5917                 cur = cur->next;
5918                 if (node->is_data)
5919                         continue;
5920                 back = to_tree_backref(node);
5921                 if (parent > 0) {
5922                         if (!node->full_backref)
5923                                 continue;
5924                         if (parent == back->parent)
5925                                 return back;
5926                 } else {
5927                         if (node->full_backref)
5928                                 continue;
5929                         if (back->root == root)
5930                                 return back;
5931                 }
5932         }
5933         return NULL;
5934 }
5935
5936 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5937                                                 u64 parent, u64 root)
5938 {
5939         struct tree_backref *ref = malloc(sizeof(*ref));
5940
5941         if (!ref)
5942                 return NULL;
5943         memset(&ref->node, 0, sizeof(ref->node));
5944         if (parent > 0) {
5945                 ref->parent = parent;
5946                 ref->node.full_backref = 1;
5947         } else {
5948                 ref->root = root;
5949                 ref->node.full_backref = 0;
5950         }
5951         list_add_tail(&ref->node.list, &rec->backrefs);
5952
5953         return ref;
5954 }
5955
5956 static struct data_backref *find_data_backref(struct extent_record *rec,
5957                                                 u64 parent, u64 root,
5958                                                 u64 owner, u64 offset,
5959                                                 int found_ref,
5960                                                 u64 disk_bytenr, u64 bytes)
5961 {
5962         struct list_head *cur = rec->backrefs.next;
5963         struct extent_backref *node;
5964         struct data_backref *back;
5965
5966         while(cur != &rec->backrefs) {
5967                 node = to_extent_backref(cur);
5968                 cur = cur->next;
5969                 if (!node->is_data)
5970                         continue;
5971                 back = to_data_backref(node);
5972                 if (parent > 0) {
5973                         if (!node->full_backref)
5974                                 continue;
5975                         if (parent == back->parent)
5976                                 return back;
5977                 } else {
5978                         if (node->full_backref)
5979                                 continue;
5980                         if (back->root == root && back->owner == owner &&
5981                             back->offset == offset) {
5982                                 if (found_ref && node->found_ref &&
5983                                     (back->bytes != bytes ||
5984                                     back->disk_bytenr != disk_bytenr))
5985                                         continue;
5986                                 return back;
5987                         }
5988                 }
5989         }
5990         return NULL;
5991 }
5992
5993 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5994                                                 u64 parent, u64 root,
5995                                                 u64 owner, u64 offset,
5996                                                 u64 max_size)
5997 {
5998         struct data_backref *ref = malloc(sizeof(*ref));
5999
6000         if (!ref)
6001                 return NULL;
6002         memset(&ref->node, 0, sizeof(ref->node));
6003         ref->node.is_data = 1;
6004
6005         if (parent > 0) {
6006                 ref->parent = parent;
6007                 ref->owner = 0;
6008                 ref->offset = 0;
6009                 ref->node.full_backref = 1;
6010         } else {
6011                 ref->root = root;
6012                 ref->owner = owner;
6013                 ref->offset = offset;
6014                 ref->node.full_backref = 0;
6015         }
6016         ref->bytes = max_size;
6017         ref->found_ref = 0;
6018         ref->num_refs = 0;
6019         list_add_tail(&ref->node.list, &rec->backrefs);
6020         if (max_size > rec->max_size)
6021                 rec->max_size = max_size;
6022         return ref;
6023 }
6024
6025 /* Check if the type of extent matches with its chunk */
6026 static void check_extent_type(struct extent_record *rec)
6027 {
6028         struct btrfs_block_group_cache *bg_cache;
6029
6030         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6031         if (!bg_cache)
6032                 return;
6033
6034         /* data extent, check chunk directly*/
6035         if (!rec->metadata) {
6036                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6037                         rec->wrong_chunk_type = 1;
6038                 return;
6039         }
6040
6041         /* metadata extent, check the obvious case first */
6042         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6043                                  BTRFS_BLOCK_GROUP_METADATA))) {
6044                 rec->wrong_chunk_type = 1;
6045                 return;
6046         }
6047
6048         /*
6049          * Check SYSTEM extent, as it's also marked as metadata, we can only
6050          * make sure it's a SYSTEM extent by its backref
6051          */
6052         if (!list_empty(&rec->backrefs)) {
6053                 struct extent_backref *node;
6054                 struct tree_backref *tback;
6055                 u64 bg_type;
6056
6057                 node = to_extent_backref(rec->backrefs.next);
6058                 if (node->is_data) {
6059                         /* tree block shouldn't have data backref */
6060                         rec->wrong_chunk_type = 1;
6061                         return;
6062                 }
6063                 tback = container_of(node, struct tree_backref, node);
6064
6065                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6066                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6067                 else
6068                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6069                 if (!(bg_cache->flags & bg_type))
6070                         rec->wrong_chunk_type = 1;
6071         }
6072 }
6073
6074 /*
6075  * Allocate a new extent record, fill default values from @tmpl and insert int
6076  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6077  * the cache, otherwise it fails.
6078  */
6079 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6080                 struct extent_record *tmpl)
6081 {
6082         struct extent_record *rec;
6083         int ret = 0;
6084
6085         BUG_ON(tmpl->max_size == 0);
6086         rec = malloc(sizeof(*rec));
6087         if (!rec)
6088                 return -ENOMEM;
6089         rec->start = tmpl->start;
6090         rec->max_size = tmpl->max_size;
6091         rec->nr = max(tmpl->nr, tmpl->max_size);
6092         rec->found_rec = tmpl->found_rec;
6093         rec->content_checked = tmpl->content_checked;
6094         rec->owner_ref_checked = tmpl->owner_ref_checked;
6095         rec->num_duplicates = 0;
6096         rec->metadata = tmpl->metadata;
6097         rec->flag_block_full_backref = FLAG_UNSET;
6098         rec->bad_full_backref = 0;
6099         rec->crossing_stripes = 0;
6100         rec->wrong_chunk_type = 0;
6101         rec->is_root = tmpl->is_root;
6102         rec->refs = tmpl->refs;
6103         rec->extent_item_refs = tmpl->extent_item_refs;
6104         rec->parent_generation = tmpl->parent_generation;
6105         INIT_LIST_HEAD(&rec->backrefs);
6106         INIT_LIST_HEAD(&rec->dups);
6107         INIT_LIST_HEAD(&rec->list);
6108         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6109         rec->cache.start = tmpl->start;
6110         rec->cache.size = tmpl->nr;
6111         ret = insert_cache_extent(extent_cache, &rec->cache);
6112         if (ret) {
6113                 free(rec);
6114                 return ret;
6115         }
6116         bytes_used += rec->nr;
6117
6118         if (tmpl->metadata)
6119                 rec->crossing_stripes = check_crossing_stripes(global_info,
6120                                 rec->start, global_info->nodesize);
6121         check_extent_type(rec);
6122         return ret;
6123 }
6124
6125 /*
6126  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6127  * some are hints:
6128  * - refs              - if found, increase refs
6129  * - is_root           - if found, set
6130  * - content_checked   - if found, set
6131  * - owner_ref_checked - if found, set
6132  *
6133  * If not found, create a new one, initialize and insert.
6134  */
6135 static int add_extent_rec(struct cache_tree *extent_cache,
6136                 struct extent_record *tmpl)
6137 {
6138         struct extent_record *rec;
6139         struct cache_extent *cache;
6140         int ret = 0;
6141         int dup = 0;
6142
6143         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6144         if (cache) {
6145                 rec = container_of(cache, struct extent_record, cache);
6146                 if (tmpl->refs)
6147                         rec->refs++;
6148                 if (rec->nr == 1)
6149                         rec->nr = max(tmpl->nr, tmpl->max_size);
6150
6151                 /*
6152                  * We need to make sure to reset nr to whatever the extent
6153                  * record says was the real size, this way we can compare it to
6154                  * the backrefs.
6155                  */
6156                 if (tmpl->found_rec) {
6157                         if (tmpl->start != rec->start || rec->found_rec) {
6158                                 struct extent_record *tmp;
6159
6160                                 dup = 1;
6161                                 if (list_empty(&rec->list))
6162                                         list_add_tail(&rec->list,
6163                                                       &duplicate_extents);
6164
6165                                 /*
6166                                  * We have to do this song and dance in case we
6167                                  * find an extent record that falls inside of
6168                                  * our current extent record but does not have
6169                                  * the same objectid.
6170                                  */
6171                                 tmp = malloc(sizeof(*tmp));
6172                                 if (!tmp)
6173                                         return -ENOMEM;
6174                                 tmp->start = tmpl->start;
6175                                 tmp->max_size = tmpl->max_size;
6176                                 tmp->nr = tmpl->nr;
6177                                 tmp->found_rec = 1;
6178                                 tmp->metadata = tmpl->metadata;
6179                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6180                                 INIT_LIST_HEAD(&tmp->list);
6181                                 list_add_tail(&tmp->list, &rec->dups);
6182                                 rec->num_duplicates++;
6183                         } else {
6184                                 rec->nr = tmpl->nr;
6185                                 rec->found_rec = 1;
6186                         }
6187                 }
6188
6189                 if (tmpl->extent_item_refs && !dup) {
6190                         if (rec->extent_item_refs) {
6191                                 fprintf(stderr, "block %llu rec "
6192                                         "extent_item_refs %llu, passed %llu\n",
6193                                         (unsigned long long)tmpl->start,
6194                                         (unsigned long long)
6195                                                         rec->extent_item_refs,
6196                                         (unsigned long long)tmpl->extent_item_refs);
6197                         }
6198                         rec->extent_item_refs = tmpl->extent_item_refs;
6199                 }
6200                 if (tmpl->is_root)
6201                         rec->is_root = 1;
6202                 if (tmpl->content_checked)
6203                         rec->content_checked = 1;
6204                 if (tmpl->owner_ref_checked)
6205                         rec->owner_ref_checked = 1;
6206                 memcpy(&rec->parent_key, &tmpl->parent_key,
6207                                 sizeof(tmpl->parent_key));
6208                 if (tmpl->parent_generation)
6209                         rec->parent_generation = tmpl->parent_generation;
6210                 if (rec->max_size < tmpl->max_size)
6211                         rec->max_size = tmpl->max_size;
6212
6213                 /*
6214                  * A metadata extent can't cross stripe_len boundary, otherwise
6215                  * kernel scrub won't be able to handle it.
6216                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6217                  * it.
6218                  */
6219                 if (tmpl->metadata)
6220                         rec->crossing_stripes = check_crossing_stripes(
6221                                         global_info, rec->start,
6222                                         global_info->nodesize);
6223                 check_extent_type(rec);
6224                 maybe_free_extent_rec(extent_cache, rec);
6225                 return ret;
6226         }
6227
6228         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6229
6230         return ret;
6231 }
6232
6233 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6234                             u64 parent, u64 root, int found_ref)
6235 {
6236         struct extent_record *rec;
6237         struct tree_backref *back;
6238         struct cache_extent *cache;
6239         int ret;
6240
6241         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6242         if (!cache) {
6243                 struct extent_record tmpl;
6244
6245                 memset(&tmpl, 0, sizeof(tmpl));
6246                 tmpl.start = bytenr;
6247                 tmpl.nr = 1;
6248                 tmpl.metadata = 1;
6249                 tmpl.max_size = 1;
6250
6251                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6252                 if (ret)
6253                         return ret;
6254
6255                 /* really a bug in cache_extent implement now */
6256                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6257                 if (!cache)
6258                         return -ENOENT;
6259         }
6260
6261         rec = container_of(cache, struct extent_record, cache);
6262         if (rec->start != bytenr) {
6263                 /*
6264                  * Several cause, from unaligned bytenr to over lapping extents
6265                  */
6266                 return -EEXIST;
6267         }
6268
6269         back = find_tree_backref(rec, parent, root);
6270         if (!back) {
6271                 back = alloc_tree_backref(rec, parent, root);
6272                 if (!back)
6273                         return -ENOMEM;
6274         }
6275
6276         if (found_ref) {
6277                 if (back->node.found_ref) {
6278                         fprintf(stderr, "Extent back ref already exists "
6279                                 "for %llu parent %llu root %llu \n",
6280                                 (unsigned long long)bytenr,
6281                                 (unsigned long long)parent,
6282                                 (unsigned long long)root);
6283                 }
6284                 back->node.found_ref = 1;
6285         } else {
6286                 if (back->node.found_extent_tree) {
6287                         fprintf(stderr, "Extent back ref already exists "
6288                                 "for %llu parent %llu root %llu \n",
6289                                 (unsigned long long)bytenr,
6290                                 (unsigned long long)parent,
6291                                 (unsigned long long)root);
6292                 }
6293                 back->node.found_extent_tree = 1;
6294         }
6295         check_extent_type(rec);
6296         maybe_free_extent_rec(extent_cache, rec);
6297         return 0;
6298 }
6299
6300 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6301                             u64 parent, u64 root, u64 owner, u64 offset,
6302                             u32 num_refs, int found_ref, u64 max_size)
6303 {
6304         struct extent_record *rec;
6305         struct data_backref *back;
6306         struct cache_extent *cache;
6307         int ret;
6308
6309         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6310         if (!cache) {
6311                 struct extent_record tmpl;
6312
6313                 memset(&tmpl, 0, sizeof(tmpl));
6314                 tmpl.start = bytenr;
6315                 tmpl.nr = 1;
6316                 tmpl.max_size = max_size;
6317
6318                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6319                 if (ret)
6320                         return ret;
6321
6322                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6323                 if (!cache)
6324                         abort();
6325         }
6326
6327         rec = container_of(cache, struct extent_record, cache);
6328         if (rec->max_size < max_size)
6329                 rec->max_size = max_size;
6330
6331         /*
6332          * If found_ref is set then max_size is the real size and must match the
6333          * existing refs.  So if we have already found a ref then we need to
6334          * make sure that this ref matches the existing one, otherwise we need
6335          * to add a new backref so we can notice that the backrefs don't match
6336          * and we need to figure out who is telling the truth.  This is to
6337          * account for that awful fsync bug I introduced where we'd end up with
6338          * a btrfs_file_extent_item that would have its length include multiple
6339          * prealloc extents or point inside of a prealloc extent.
6340          */
6341         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6342                                  bytenr, max_size);
6343         if (!back) {
6344                 back = alloc_data_backref(rec, parent, root, owner, offset,
6345                                           max_size);
6346                 BUG_ON(!back);
6347         }
6348
6349         if (found_ref) {
6350                 BUG_ON(num_refs != 1);
6351                 if (back->node.found_ref)
6352                         BUG_ON(back->bytes != max_size);
6353                 back->node.found_ref = 1;
6354                 back->found_ref += 1;
6355                 back->bytes = max_size;
6356                 back->disk_bytenr = bytenr;
6357                 rec->refs += 1;
6358                 rec->content_checked = 1;
6359                 rec->owner_ref_checked = 1;
6360         } else {
6361                 if (back->node.found_extent_tree) {
6362                         fprintf(stderr, "Extent back ref already exists "
6363                                 "for %llu parent %llu root %llu "
6364                                 "owner %llu offset %llu num_refs %lu\n",
6365                                 (unsigned long long)bytenr,
6366                                 (unsigned long long)parent,
6367                                 (unsigned long long)root,
6368                                 (unsigned long long)owner,
6369                                 (unsigned long long)offset,
6370                                 (unsigned long)num_refs);
6371                 }
6372                 back->num_refs = num_refs;
6373                 back->node.found_extent_tree = 1;
6374         }
6375         maybe_free_extent_rec(extent_cache, rec);
6376         return 0;
6377 }
6378
6379 static int add_pending(struct cache_tree *pending,
6380                        struct cache_tree *seen, u64 bytenr, u32 size)
6381 {
6382         int ret;
6383         ret = add_cache_extent(seen, bytenr, size);
6384         if (ret)
6385                 return ret;
6386         add_cache_extent(pending, bytenr, size);
6387         return 0;
6388 }
6389
6390 static int pick_next_pending(struct cache_tree *pending,
6391                         struct cache_tree *reada,
6392                         struct cache_tree *nodes,
6393                         u64 last, struct block_info *bits, int bits_nr,
6394                         int *reada_bits)
6395 {
6396         unsigned long node_start = last;
6397         struct cache_extent *cache;
6398         int ret;
6399
6400         cache = search_cache_extent(reada, 0);
6401         if (cache) {
6402                 bits[0].start = cache->start;
6403                 bits[0].size = cache->size;
6404                 *reada_bits = 1;
6405                 return 1;
6406         }
6407         *reada_bits = 0;
6408         if (node_start > 32768)
6409                 node_start -= 32768;
6410
6411         cache = search_cache_extent(nodes, node_start);
6412         if (!cache)
6413                 cache = search_cache_extent(nodes, 0);
6414
6415         if (!cache) {
6416                  cache = search_cache_extent(pending, 0);
6417                  if (!cache)
6418                          return 0;
6419                  ret = 0;
6420                  do {
6421                          bits[ret].start = cache->start;
6422                          bits[ret].size = cache->size;
6423                          cache = next_cache_extent(cache);
6424                          ret++;
6425                  } while (cache && ret < bits_nr);
6426                  return ret;
6427         }
6428
6429         ret = 0;
6430         do {
6431                 bits[ret].start = cache->start;
6432                 bits[ret].size = cache->size;
6433                 cache = next_cache_extent(cache);
6434                 ret++;
6435         } while (cache && ret < bits_nr);
6436
6437         if (bits_nr - ret > 8) {
6438                 u64 lookup = bits[0].start + bits[0].size;
6439                 struct cache_extent *next;
6440                 next = search_cache_extent(pending, lookup);
6441                 while(next) {
6442                         if (next->start - lookup > 32768)
6443                                 break;
6444                         bits[ret].start = next->start;
6445                         bits[ret].size = next->size;
6446                         lookup = next->start + next->size;
6447                         ret++;
6448                         if (ret == bits_nr)
6449                                 break;
6450                         next = next_cache_extent(next);
6451                         if (!next)
6452                                 break;
6453                 }
6454         }
6455         return ret;
6456 }
6457
6458 static void free_chunk_record(struct cache_extent *cache)
6459 {
6460         struct chunk_record *rec;
6461
6462         rec = container_of(cache, struct chunk_record, cache);
6463         list_del_init(&rec->list);
6464         list_del_init(&rec->dextents);
6465         free(rec);
6466 }
6467
6468 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6469 {
6470         cache_tree_free_extents(chunk_cache, free_chunk_record);
6471 }
6472
6473 static void free_device_record(struct rb_node *node)
6474 {
6475         struct device_record *rec;
6476
6477         rec = container_of(node, struct device_record, node);
6478         free(rec);
6479 }
6480
6481 FREE_RB_BASED_TREE(device_cache, free_device_record);
6482
6483 int insert_block_group_record(struct block_group_tree *tree,
6484                               struct block_group_record *bg_rec)
6485 {
6486         int ret;
6487
6488         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6489         if (ret)
6490                 return ret;
6491
6492         list_add_tail(&bg_rec->list, &tree->block_groups);
6493         return 0;
6494 }
6495
6496 static void free_block_group_record(struct cache_extent *cache)
6497 {
6498         struct block_group_record *rec;
6499
6500         rec = container_of(cache, struct block_group_record, cache);
6501         list_del_init(&rec->list);
6502         free(rec);
6503 }
6504
6505 void free_block_group_tree(struct block_group_tree *tree)
6506 {
6507         cache_tree_free_extents(&tree->tree, free_block_group_record);
6508 }
6509
6510 int insert_device_extent_record(struct device_extent_tree *tree,
6511                                 struct device_extent_record *de_rec)
6512 {
6513         int ret;
6514
6515         /*
6516          * Device extent is a bit different from the other extents, because
6517          * the extents which belong to the different devices may have the
6518          * same start and size, so we need use the special extent cache
6519          * search/insert functions.
6520          */
6521         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6522         if (ret)
6523                 return ret;
6524
6525         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6526         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6527         return 0;
6528 }
6529
6530 static void free_device_extent_record(struct cache_extent *cache)
6531 {
6532         struct device_extent_record *rec;
6533
6534         rec = container_of(cache, struct device_extent_record, cache);
6535         if (!list_empty(&rec->chunk_list))
6536                 list_del_init(&rec->chunk_list);
6537         if (!list_empty(&rec->device_list))
6538                 list_del_init(&rec->device_list);
6539         free(rec);
6540 }
6541
6542 void free_device_extent_tree(struct device_extent_tree *tree)
6543 {
6544         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6545 }
6546
6547 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6548 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6549                                  struct extent_buffer *leaf, int slot)
6550 {
6551         struct btrfs_extent_ref_v0 *ref0;
6552         struct btrfs_key key;
6553         int ret;
6554
6555         btrfs_item_key_to_cpu(leaf, &key, slot);
6556         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6557         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6558                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6559                                 0, 0);
6560         } else {
6561                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6562                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6563         }
6564         return ret;
6565 }
6566 #endif
6567
6568 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6569                                             struct btrfs_key *key,
6570                                             int slot)
6571 {
6572         struct btrfs_chunk *ptr;
6573         struct chunk_record *rec;
6574         int num_stripes, i;
6575
6576         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6577         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6578
6579         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6580         if (!rec) {
6581                 fprintf(stderr, "memory allocation failed\n");
6582                 exit(-1);
6583         }
6584
6585         INIT_LIST_HEAD(&rec->list);
6586         INIT_LIST_HEAD(&rec->dextents);
6587         rec->bg_rec = NULL;
6588
6589         rec->cache.start = key->offset;
6590         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6591
6592         rec->generation = btrfs_header_generation(leaf);
6593
6594         rec->objectid = key->objectid;
6595         rec->type = key->type;
6596         rec->offset = key->offset;
6597
6598         rec->length = rec->cache.size;
6599         rec->owner = btrfs_chunk_owner(leaf, ptr);
6600         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6601         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6602         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6603         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6604         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6605         rec->num_stripes = num_stripes;
6606         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6607
6608         for (i = 0; i < rec->num_stripes; ++i) {
6609                 rec->stripes[i].devid =
6610                         btrfs_stripe_devid_nr(leaf, ptr, i);
6611                 rec->stripes[i].offset =
6612                         btrfs_stripe_offset_nr(leaf, ptr, i);
6613                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6614                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6615                                 BTRFS_UUID_SIZE);
6616         }
6617
6618         return rec;
6619 }
6620
6621 static int process_chunk_item(struct cache_tree *chunk_cache,
6622                               struct btrfs_key *key, struct extent_buffer *eb,
6623                               int slot)
6624 {
6625         struct chunk_record *rec;
6626         struct btrfs_chunk *chunk;
6627         int ret = 0;
6628
6629         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6630         /*
6631          * Do extra check for this chunk item,
6632          *
6633          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6634          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6635          * and owner<->key_type check.
6636          */
6637         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6638                                       key->offset);
6639         if (ret < 0) {
6640                 error("chunk(%llu, %llu) is not valid, ignore it",
6641                       key->offset, btrfs_chunk_length(eb, chunk));
6642                 return 0;
6643         }
6644         rec = btrfs_new_chunk_record(eb, key, slot);
6645         ret = insert_cache_extent(chunk_cache, &rec->cache);
6646         if (ret) {
6647                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6648                         rec->offset, rec->length);
6649                 free(rec);
6650         }
6651
6652         return ret;
6653 }
6654
6655 static int process_device_item(struct rb_root *dev_cache,
6656                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6657 {
6658         struct btrfs_dev_item *ptr;
6659         struct device_record *rec;
6660         int ret = 0;
6661
6662         ptr = btrfs_item_ptr(eb,
6663                 slot, struct btrfs_dev_item);
6664
6665         rec = malloc(sizeof(*rec));
6666         if (!rec) {
6667                 fprintf(stderr, "memory allocation failed\n");
6668                 return -ENOMEM;
6669         }
6670
6671         rec->devid = key->offset;
6672         rec->generation = btrfs_header_generation(eb);
6673
6674         rec->objectid = key->objectid;
6675         rec->type = key->type;
6676         rec->offset = key->offset;
6677
6678         rec->devid = btrfs_device_id(eb, ptr);
6679         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6680         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6681
6682         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6683         if (ret) {
6684                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6685                 free(rec);
6686         }
6687
6688         return ret;
6689 }
6690
6691 struct block_group_record *
6692 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6693                              int slot)
6694 {
6695         struct btrfs_block_group_item *ptr;
6696         struct block_group_record *rec;
6697
6698         rec = calloc(1, sizeof(*rec));
6699         if (!rec) {
6700                 fprintf(stderr, "memory allocation failed\n");
6701                 exit(-1);
6702         }
6703
6704         rec->cache.start = key->objectid;
6705         rec->cache.size = key->offset;
6706
6707         rec->generation = btrfs_header_generation(leaf);
6708
6709         rec->objectid = key->objectid;
6710         rec->type = key->type;
6711         rec->offset = key->offset;
6712
6713         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6714         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6715
6716         INIT_LIST_HEAD(&rec->list);
6717
6718         return rec;
6719 }
6720
6721 static int process_block_group_item(struct block_group_tree *block_group_cache,
6722                                     struct btrfs_key *key,
6723                                     struct extent_buffer *eb, int slot)
6724 {
6725         struct block_group_record *rec;
6726         int ret = 0;
6727
6728         rec = btrfs_new_block_group_record(eb, key, slot);
6729         ret = insert_block_group_record(block_group_cache, rec);
6730         if (ret) {
6731                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6732                         rec->objectid, rec->offset);
6733                 free(rec);
6734         }
6735
6736         return ret;
6737 }
6738
6739 struct device_extent_record *
6740 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6741                                struct btrfs_key *key, int slot)
6742 {
6743         struct device_extent_record *rec;
6744         struct btrfs_dev_extent *ptr;
6745
6746         rec = calloc(1, sizeof(*rec));
6747         if (!rec) {
6748                 fprintf(stderr, "memory allocation failed\n");
6749                 exit(-1);
6750         }
6751
6752         rec->cache.objectid = key->objectid;
6753         rec->cache.start = key->offset;
6754
6755         rec->generation = btrfs_header_generation(leaf);
6756
6757         rec->objectid = key->objectid;
6758         rec->type = key->type;
6759         rec->offset = key->offset;
6760
6761         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6762         rec->chunk_objecteid =
6763                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6764         rec->chunk_offset =
6765                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6766         rec->length = btrfs_dev_extent_length(leaf, ptr);
6767         rec->cache.size = rec->length;
6768
6769         INIT_LIST_HEAD(&rec->chunk_list);
6770         INIT_LIST_HEAD(&rec->device_list);
6771
6772         return rec;
6773 }
6774
6775 static int
6776 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6777                            struct btrfs_key *key, struct extent_buffer *eb,
6778                            int slot)
6779 {
6780         struct device_extent_record *rec;
6781         int ret;
6782
6783         rec = btrfs_new_device_extent_record(eb, key, slot);
6784         ret = insert_device_extent_record(dev_extent_cache, rec);
6785         if (ret) {
6786                 fprintf(stderr,
6787                         "Device extent[%llu, %llu, %llu] existed.\n",
6788                         rec->objectid, rec->offset, rec->length);
6789                 free(rec);
6790         }
6791
6792         return ret;
6793 }
6794
6795 static int process_extent_item(struct btrfs_root *root,
6796                                struct cache_tree *extent_cache,
6797                                struct extent_buffer *eb, int slot)
6798 {
6799         struct btrfs_extent_item *ei;
6800         struct btrfs_extent_inline_ref *iref;
6801         struct btrfs_extent_data_ref *dref;
6802         struct btrfs_shared_data_ref *sref;
6803         struct btrfs_key key;
6804         struct extent_record tmpl;
6805         unsigned long end;
6806         unsigned long ptr;
6807         int ret;
6808         int type;
6809         u32 item_size = btrfs_item_size_nr(eb, slot);
6810         u64 refs = 0;
6811         u64 offset;
6812         u64 num_bytes;
6813         int metadata = 0;
6814
6815         btrfs_item_key_to_cpu(eb, &key, slot);
6816
6817         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6818                 metadata = 1;
6819                 num_bytes = root->fs_info->nodesize;
6820         } else {
6821                 num_bytes = key.offset;
6822         }
6823
6824         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6825                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6826                       key.objectid, root->fs_info->sectorsize);
6827                 return -EIO;
6828         }
6829         if (item_size < sizeof(*ei)) {
6830 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6831                 struct btrfs_extent_item_v0 *ei0;
6832                 BUG_ON(item_size != sizeof(*ei0));
6833                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6834                 refs = btrfs_extent_refs_v0(eb, ei0);
6835 #else
6836                 BUG();
6837 #endif
6838                 memset(&tmpl, 0, sizeof(tmpl));
6839                 tmpl.start = key.objectid;
6840                 tmpl.nr = num_bytes;
6841                 tmpl.extent_item_refs = refs;
6842                 tmpl.metadata = metadata;
6843                 tmpl.found_rec = 1;
6844                 tmpl.max_size = num_bytes;
6845
6846                 return add_extent_rec(extent_cache, &tmpl);
6847         }
6848
6849         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6850         refs = btrfs_extent_refs(eb, ei);
6851         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6852                 metadata = 1;
6853         else
6854                 metadata = 0;
6855         if (metadata && num_bytes != root->fs_info->nodesize) {
6856                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6857                       num_bytes, root->fs_info->nodesize);
6858                 return -EIO;
6859         }
6860         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6861                 error("ignore invalid data extent, length %llu is not aligned to %u",
6862                       num_bytes, root->fs_info->sectorsize);
6863                 return -EIO;
6864         }
6865
6866         memset(&tmpl, 0, sizeof(tmpl));
6867         tmpl.start = key.objectid;
6868         tmpl.nr = num_bytes;
6869         tmpl.extent_item_refs = refs;
6870         tmpl.metadata = metadata;
6871         tmpl.found_rec = 1;
6872         tmpl.max_size = num_bytes;
6873         add_extent_rec(extent_cache, &tmpl);
6874
6875         ptr = (unsigned long)(ei + 1);
6876         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6877             key.type == BTRFS_EXTENT_ITEM_KEY)
6878                 ptr += sizeof(struct btrfs_tree_block_info);
6879
6880         end = (unsigned long)ei + item_size;
6881         while (ptr < end) {
6882                 iref = (struct btrfs_extent_inline_ref *)ptr;
6883                 type = btrfs_extent_inline_ref_type(eb, iref);
6884                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6885                 switch (type) {
6886                 case BTRFS_TREE_BLOCK_REF_KEY:
6887                         ret = add_tree_backref(extent_cache, key.objectid,
6888                                         0, offset, 0);
6889                         if (ret < 0)
6890                                 error(
6891                         "add_tree_backref failed (extent items tree block): %s",
6892                                       strerror(-ret));
6893                         break;
6894                 case BTRFS_SHARED_BLOCK_REF_KEY:
6895                         ret = add_tree_backref(extent_cache, key.objectid,
6896                                         offset, 0, 0);
6897                         if (ret < 0)
6898                                 error(
6899                         "add_tree_backref failed (extent items shared block): %s",
6900                                       strerror(-ret));
6901                         break;
6902                 case BTRFS_EXTENT_DATA_REF_KEY:
6903                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6904                         add_data_backref(extent_cache, key.objectid, 0,
6905                                         btrfs_extent_data_ref_root(eb, dref),
6906                                         btrfs_extent_data_ref_objectid(eb,
6907                                                                        dref),
6908                                         btrfs_extent_data_ref_offset(eb, dref),
6909                                         btrfs_extent_data_ref_count(eb, dref),
6910                                         0, num_bytes);
6911                         break;
6912                 case BTRFS_SHARED_DATA_REF_KEY:
6913                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6914                         add_data_backref(extent_cache, key.objectid, offset,
6915                                         0, 0, 0,
6916                                         btrfs_shared_data_ref_count(eb, sref),
6917                                         0, num_bytes);
6918                         break;
6919                 default:
6920                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6921                                 key.objectid, key.type, num_bytes);
6922                         goto out;
6923                 }
6924                 ptr += btrfs_extent_inline_ref_size(type);
6925         }
6926         WARN_ON(ptr > end);
6927 out:
6928         return 0;
6929 }
6930
6931 static int check_cache_range(struct btrfs_root *root,
6932                              struct btrfs_block_group_cache *cache,
6933                              u64 offset, u64 bytes)
6934 {
6935         struct btrfs_free_space *entry;
6936         u64 *logical;
6937         u64 bytenr;
6938         int stripe_len;
6939         int i, nr, ret;
6940
6941         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6942                 bytenr = btrfs_sb_offset(i);
6943                 ret = btrfs_rmap_block(root->fs_info,
6944                                        cache->key.objectid, bytenr, 0,
6945                                        &logical, &nr, &stripe_len);
6946                 if (ret)
6947                         return ret;
6948
6949                 while (nr--) {
6950                         if (logical[nr] + stripe_len <= offset)
6951                                 continue;
6952                         if (offset + bytes <= logical[nr])
6953                                 continue;
6954                         if (logical[nr] == offset) {
6955                                 if (stripe_len >= bytes) {
6956                                         free(logical);
6957                                         return 0;
6958                                 }
6959                                 bytes -= stripe_len;
6960                                 offset += stripe_len;
6961                         } else if (logical[nr] < offset) {
6962                                 if (logical[nr] + stripe_len >=
6963                                     offset + bytes) {
6964                                         free(logical);
6965                                         return 0;
6966                                 }
6967                                 bytes = (offset + bytes) -
6968                                         (logical[nr] + stripe_len);
6969                                 offset = logical[nr] + stripe_len;
6970                         } else {
6971                                 /*
6972                                  * Could be tricky, the super may land in the
6973                                  * middle of the area we're checking.  First
6974                                  * check the easiest case, it's at the end.
6975                                  */
6976                                 if (logical[nr] + stripe_len >=
6977                                     bytes + offset) {
6978                                         bytes = logical[nr] - offset;
6979                                         continue;
6980                                 }
6981
6982                                 /* Check the left side */
6983                                 ret = check_cache_range(root, cache,
6984                                                         offset,
6985                                                         logical[nr] - offset);
6986                                 if (ret) {
6987                                         free(logical);
6988                                         return ret;
6989                                 }
6990
6991                                 /* Now we continue with the right side */
6992                                 bytes = (offset + bytes) -
6993                                         (logical[nr] + stripe_len);
6994                                 offset = logical[nr] + stripe_len;
6995                         }
6996                 }
6997
6998                 free(logical);
6999         }
7000
7001         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7002         if (!entry) {
7003                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7004                         offset, offset+bytes);
7005                 return -EINVAL;
7006         }
7007
7008         if (entry->offset != offset) {
7009                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7010                         entry->offset);
7011                 return -EINVAL;
7012         }
7013
7014         if (entry->bytes != bytes) {
7015                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7016                         bytes, entry->bytes, offset);
7017                 return -EINVAL;
7018         }
7019
7020         unlink_free_space(cache->free_space_ctl, entry);
7021         free(entry);
7022         return 0;
7023 }
7024
7025 static int verify_space_cache(struct btrfs_root *root,
7026                               struct btrfs_block_group_cache *cache)
7027 {
7028         struct btrfs_path path;
7029         struct extent_buffer *leaf;
7030         struct btrfs_key key;
7031         u64 last;
7032         int ret = 0;
7033
7034         root = root->fs_info->extent_root;
7035
7036         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7037
7038         btrfs_init_path(&path);
7039         key.objectid = last;
7040         key.offset = 0;
7041         key.type = BTRFS_EXTENT_ITEM_KEY;
7042         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7043         if (ret < 0)
7044                 goto out;
7045         ret = 0;
7046         while (1) {
7047                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7048                         ret = btrfs_next_leaf(root, &path);
7049                         if (ret < 0)
7050                                 goto out;
7051                         if (ret > 0) {
7052                                 ret = 0;
7053                                 break;
7054                         }
7055                 }
7056                 leaf = path.nodes[0];
7057                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7058                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7059                         break;
7060                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7061                     key.type != BTRFS_METADATA_ITEM_KEY) {
7062                         path.slots[0]++;
7063                         continue;
7064                 }
7065
7066                 if (last == key.objectid) {
7067                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7068                                 last = key.objectid + key.offset;
7069                         else
7070                                 last = key.objectid + root->fs_info->nodesize;
7071                         path.slots[0]++;
7072                         continue;
7073                 }
7074
7075                 ret = check_cache_range(root, cache, last,
7076                                         key.objectid - last);
7077                 if (ret)
7078                         break;
7079                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7080                         last = key.objectid + key.offset;
7081                 else
7082                         last = key.objectid + root->fs_info->nodesize;
7083                 path.slots[0]++;
7084         }
7085
7086         if (last < cache->key.objectid + cache->key.offset)
7087                 ret = check_cache_range(root, cache, last,
7088                                         cache->key.objectid +
7089                                         cache->key.offset - last);
7090
7091 out:
7092         btrfs_release_path(&path);
7093
7094         if (!ret &&
7095             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7096                 fprintf(stderr, "There are still entries left in the space "
7097                         "cache\n");
7098                 ret = -EINVAL;
7099         }
7100
7101         return ret;
7102 }
7103
7104 static int check_space_cache(struct btrfs_root *root)
7105 {
7106         struct btrfs_block_group_cache *cache;
7107         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7108         int ret;
7109         int error = 0;
7110
7111         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7112             btrfs_super_generation(root->fs_info->super_copy) !=
7113             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7114                 printf("cache and super generation don't match, space cache "
7115                        "will be invalidated\n");
7116                 return 0;
7117         }
7118
7119         if (ctx.progress_enabled) {
7120                 ctx.tp = TASK_FREE_SPACE;
7121                 task_start(ctx.info);
7122         }
7123
7124         while (1) {
7125                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7126                 if (!cache)
7127                         break;
7128
7129                 start = cache->key.objectid + cache->key.offset;
7130                 if (!cache->free_space_ctl) {
7131                         if (btrfs_init_free_space_ctl(cache,
7132                                                 root->fs_info->sectorsize)) {
7133                                 ret = -ENOMEM;
7134                                 break;
7135                         }
7136                 } else {
7137                         btrfs_remove_free_space_cache(cache);
7138                 }
7139
7140                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7141                         ret = exclude_super_stripes(root, cache);
7142                         if (ret) {
7143                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7144                                         strerror(-ret));
7145                                 error++;
7146                                 continue;
7147                         }
7148                         ret = load_free_space_tree(root->fs_info, cache);
7149                         free_excluded_extents(root, cache);
7150                         if (ret < 0) {
7151                                 fprintf(stderr, "could not load free space tree: %s\n",
7152                                         strerror(-ret));
7153                                 error++;
7154                                 continue;
7155                         }
7156                         error += ret;
7157                 } else {
7158                         ret = load_free_space_cache(root->fs_info, cache);
7159                         if (!ret)
7160                                 continue;
7161                 }
7162
7163                 ret = verify_space_cache(root, cache);
7164                 if (ret) {
7165                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7166                                 cache->key.objectid);
7167                         error++;
7168                 }
7169         }
7170
7171         task_stop(ctx.info);
7172
7173         return error ? -EINVAL : 0;
7174 }
7175
7176 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7177                         u64 num_bytes, unsigned long leaf_offset,
7178                         struct extent_buffer *eb) {
7179
7180         struct btrfs_fs_info *fs_info = root->fs_info;
7181         u64 offset = 0;
7182         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7183         char *data;
7184         unsigned long csum_offset;
7185         u32 csum;
7186         u32 csum_expected;
7187         u64 read_len;
7188         u64 data_checked = 0;
7189         u64 tmp;
7190         int ret = 0;
7191         int mirror;
7192         int num_copies;
7193
7194         if (num_bytes % fs_info->sectorsize)
7195                 return -EINVAL;
7196
7197         data = malloc(num_bytes);
7198         if (!data)
7199                 return -ENOMEM;
7200
7201         while (offset < num_bytes) {
7202                 mirror = 0;
7203 again:
7204                 read_len = num_bytes - offset;
7205                 /* read as much space once a time */
7206                 ret = read_extent_data(fs_info, data + offset,
7207                                 bytenr + offset, &read_len, mirror);
7208                 if (ret)
7209                         goto out;
7210                 data_checked = 0;
7211                 /* verify every 4k data's checksum */
7212                 while (data_checked < read_len) {
7213                         csum = ~(u32)0;
7214                         tmp = offset + data_checked;
7215
7216                         csum = btrfs_csum_data((char *)data + tmp,
7217                                                csum, fs_info->sectorsize);
7218                         btrfs_csum_final(csum, (u8 *)&csum);
7219
7220                         csum_offset = leaf_offset +
7221                                  tmp / fs_info->sectorsize * csum_size;
7222                         read_extent_buffer(eb, (char *)&csum_expected,
7223                                            csum_offset, csum_size);
7224                         /* try another mirror */
7225                         if (csum != csum_expected) {
7226                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7227                                                 mirror, bytenr + tmp,
7228                                                 csum, csum_expected);
7229                                 num_copies = btrfs_num_copies(root->fs_info,
7230                                                 bytenr, num_bytes);
7231                                 if (mirror < num_copies - 1) {
7232                                         mirror += 1;
7233                                         goto again;
7234                                 }
7235                         }
7236                         data_checked += fs_info->sectorsize;
7237                 }
7238                 offset += read_len;
7239         }
7240 out:
7241         free(data);
7242         return ret;
7243 }
7244
7245 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7246                                u64 num_bytes)
7247 {
7248         struct btrfs_path path;
7249         struct extent_buffer *leaf;
7250         struct btrfs_key key;
7251         int ret;
7252
7253         btrfs_init_path(&path);
7254         key.objectid = bytenr;
7255         key.type = BTRFS_EXTENT_ITEM_KEY;
7256         key.offset = (u64)-1;
7257
7258 again:
7259         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7260                                 0, 0);
7261         if (ret < 0) {
7262                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7263                 btrfs_release_path(&path);
7264                 return ret;
7265         } else if (ret) {
7266                 if (path.slots[0] > 0) {
7267                         path.slots[0]--;
7268                 } else {
7269                         ret = btrfs_prev_leaf(root, &path);
7270                         if (ret < 0) {
7271                                 goto out;
7272                         } else if (ret > 0) {
7273                                 ret = 0;
7274                                 goto out;
7275                         }
7276                 }
7277         }
7278
7279         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7280
7281         /*
7282          * Block group items come before extent items if they have the same
7283          * bytenr, so walk back one more just in case.  Dear future traveller,
7284          * first congrats on mastering time travel.  Now if it's not too much
7285          * trouble could you go back to 2006 and tell Chris to make the
7286          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7287          * EXTENT_ITEM_KEY please?
7288          */
7289         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7290                 if (path.slots[0] > 0) {
7291                         path.slots[0]--;
7292                 } else {
7293                         ret = btrfs_prev_leaf(root, &path);
7294                         if (ret < 0) {
7295                                 goto out;
7296                         } else if (ret > 0) {
7297                                 ret = 0;
7298                                 goto out;
7299                         }
7300                 }
7301                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7302         }
7303
7304         while (num_bytes) {
7305                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7306                         ret = btrfs_next_leaf(root, &path);
7307                         if (ret < 0) {
7308                                 fprintf(stderr, "Error going to next leaf "
7309                                         "%d\n", ret);
7310                                 btrfs_release_path(&path);
7311                                 return ret;
7312                         } else if (ret) {
7313                                 break;
7314                         }
7315                 }
7316                 leaf = path.nodes[0];
7317                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7318                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7319                         path.slots[0]++;
7320                         continue;
7321                 }
7322                 if (key.objectid + key.offset < bytenr) {
7323                         path.slots[0]++;
7324                         continue;
7325                 }
7326                 if (key.objectid > bytenr + num_bytes)
7327                         break;
7328
7329                 if (key.objectid == bytenr) {
7330                         if (key.offset >= num_bytes) {
7331                                 num_bytes = 0;
7332                                 break;
7333                         }
7334                         num_bytes -= key.offset;
7335                         bytenr += key.offset;
7336                 } else if (key.objectid < bytenr) {
7337                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7338                                 num_bytes = 0;
7339                                 break;
7340                         }
7341                         num_bytes = (bytenr + num_bytes) -
7342                                 (key.objectid + key.offset);
7343                         bytenr = key.objectid + key.offset;
7344                 } else {
7345                         if (key.objectid + key.offset < bytenr + num_bytes) {
7346                                 u64 new_start = key.objectid + key.offset;
7347                                 u64 new_bytes = bytenr + num_bytes - new_start;
7348
7349                                 /*
7350                                  * Weird case, the extent is in the middle of
7351                                  * our range, we'll have to search one side
7352                                  * and then the other.  Not sure if this happens
7353                                  * in real life, but no harm in coding it up
7354                                  * anyway just in case.
7355                                  */
7356                                 btrfs_release_path(&path);
7357                                 ret = check_extent_exists(root, new_start,
7358                                                           new_bytes);
7359                                 if (ret) {
7360                                         fprintf(stderr, "Right section didn't "
7361                                                 "have a record\n");
7362                                         break;
7363                                 }
7364                                 num_bytes = key.objectid - bytenr;
7365                                 goto again;
7366                         }
7367                         num_bytes = key.objectid - bytenr;
7368                 }
7369                 path.slots[0]++;
7370         }
7371         ret = 0;
7372
7373 out:
7374         if (num_bytes && !ret) {
7375                 fprintf(stderr, "There are no extents for csum range "
7376                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7377                 ret = 1;
7378         }
7379
7380         btrfs_release_path(&path);
7381         return ret;
7382 }
7383
7384 static int check_csums(struct btrfs_root *root)
7385 {
7386         struct btrfs_path path;
7387         struct extent_buffer *leaf;
7388         struct btrfs_key key;
7389         u64 offset = 0, num_bytes = 0;
7390         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7391         int errors = 0;
7392         int ret;
7393         u64 data_len;
7394         unsigned long leaf_offset;
7395
7396         root = root->fs_info->csum_root;
7397         if (!extent_buffer_uptodate(root->node)) {
7398                 fprintf(stderr, "No valid csum tree found\n");
7399                 return -ENOENT;
7400         }
7401
7402         btrfs_init_path(&path);
7403         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7404         key.type = BTRFS_EXTENT_CSUM_KEY;
7405         key.offset = 0;
7406         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7407         if (ret < 0) {
7408                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7409                 btrfs_release_path(&path);
7410                 return ret;
7411         }
7412
7413         if (ret > 0 && path.slots[0])
7414                 path.slots[0]--;
7415         ret = 0;
7416
7417         while (1) {
7418                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7419                         ret = btrfs_next_leaf(root, &path);
7420                         if (ret < 0) {
7421                                 fprintf(stderr, "Error going to next leaf "
7422                                         "%d\n", ret);
7423                                 break;
7424                         }
7425                         if (ret)
7426                                 break;
7427                 }
7428                 leaf = path.nodes[0];
7429
7430                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7431                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7432                         path.slots[0]++;
7433                         continue;
7434                 }
7435
7436                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7437                               csum_size) * root->fs_info->sectorsize;
7438                 if (!check_data_csum)
7439                         goto skip_csum_check;
7440                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7441                 ret = check_extent_csums(root, key.offset, data_len,
7442                                          leaf_offset, leaf);
7443                 if (ret)
7444                         break;
7445 skip_csum_check:
7446                 if (!num_bytes) {
7447                         offset = key.offset;
7448                 } else if (key.offset != offset + num_bytes) {
7449                         ret = check_extent_exists(root, offset, num_bytes);
7450                         if (ret) {
7451                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7452                                         "there is no extent record\n",
7453                                         offset, offset+num_bytes);
7454                                 errors++;
7455                         }
7456                         offset = key.offset;
7457                         num_bytes = 0;
7458                 }
7459                 num_bytes += data_len;
7460                 path.slots[0]++;
7461         }
7462
7463         btrfs_release_path(&path);
7464         return errors;
7465 }
7466
7467 static int is_dropped_key(struct btrfs_key *key,
7468                           struct btrfs_key *drop_key) {
7469         if (key->objectid < drop_key->objectid)
7470                 return 1;
7471         else if (key->objectid == drop_key->objectid) {
7472                 if (key->type < drop_key->type)
7473                         return 1;
7474                 else if (key->type == drop_key->type) {
7475                         if (key->offset < drop_key->offset)
7476                                 return 1;
7477                 }
7478         }
7479         return 0;
7480 }
7481
7482 /*
7483  * Here are the rules for FULL_BACKREF.
7484  *
7485  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7486  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7487  *      FULL_BACKREF set.
7488  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7489  *    if it happened after the relocation occurred since we'll have dropped the
7490  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7491  *    have no real way to know for sure.
7492  *
7493  * We process the blocks one root at a time, and we start from the lowest root
7494  * objectid and go to the highest.  So we can just lookup the owner backref for
7495  * the record and if we don't find it then we know it doesn't exist and we have
7496  * a FULL BACKREF.
7497  *
7498  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7499  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7500  * be set or not and then we can check later once we've gathered all the refs.
7501  */
7502 static int calc_extent_flag(struct cache_tree *extent_cache,
7503                            struct extent_buffer *buf,
7504                            struct root_item_record *ri,
7505                            u64 *flags)
7506 {
7507         struct extent_record *rec;
7508         struct cache_extent *cache;
7509         struct tree_backref *tback;
7510         u64 owner = 0;
7511
7512         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7513         /* we have added this extent before */
7514         if (!cache)
7515                 return -ENOENT;
7516
7517         rec = container_of(cache, struct extent_record, cache);
7518
7519         /*
7520          * Except file/reloc tree, we can not have
7521          * FULL BACKREF MODE
7522          */
7523         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7524                 goto normal;
7525         /*
7526          * root node
7527          */
7528         if (buf->start == ri->bytenr)
7529                 goto normal;
7530
7531         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7532                 goto full_backref;
7533
7534         owner = btrfs_header_owner(buf);
7535         if (owner == ri->objectid)
7536                 goto normal;
7537
7538         tback = find_tree_backref(rec, 0, owner);
7539         if (!tback)
7540                 goto full_backref;
7541 normal:
7542         *flags = 0;
7543         if (rec->flag_block_full_backref != FLAG_UNSET &&
7544             rec->flag_block_full_backref != 0)
7545                 rec->bad_full_backref = 1;
7546         return 0;
7547 full_backref:
7548         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7549         if (rec->flag_block_full_backref != FLAG_UNSET &&
7550             rec->flag_block_full_backref != 1)
7551                 rec->bad_full_backref = 1;
7552         return 0;
7553 }
7554
7555 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7556 {
7557         fprintf(stderr, "Invalid key type(");
7558         print_key_type(stderr, 0, key_type);
7559         fprintf(stderr, ") found in root(");
7560         print_objectid(stderr, rootid, 0);
7561         fprintf(stderr, ")\n");
7562 }
7563
7564 /*
7565  * Check if the key is valid with its extent buffer.
7566  *
7567  * This is a early check in case invalid key exists in a extent buffer
7568  * This is not comprehensive yet, but should prevent wrong key/item passed
7569  * further
7570  */
7571 static int check_type_with_root(u64 rootid, u8 key_type)
7572 {
7573         switch (key_type) {
7574         /* Only valid in chunk tree */
7575         case BTRFS_DEV_ITEM_KEY:
7576         case BTRFS_CHUNK_ITEM_KEY:
7577                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7578                         goto err;
7579                 break;
7580         /* valid in csum and log tree */
7581         case BTRFS_CSUM_TREE_OBJECTID:
7582                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7583                       is_fstree(rootid)))
7584                         goto err;
7585                 break;
7586         case BTRFS_EXTENT_ITEM_KEY:
7587         case BTRFS_METADATA_ITEM_KEY:
7588         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7589                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7590                         goto err;
7591                 break;
7592         case BTRFS_ROOT_ITEM_KEY:
7593                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7594                         goto err;
7595                 break;
7596         case BTRFS_DEV_EXTENT_KEY:
7597                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7598                         goto err;
7599                 break;
7600         }
7601         return 0;
7602 err:
7603         report_mismatch_key_root(key_type, rootid);
7604         return -EINVAL;
7605 }
7606
7607 static int run_next_block(struct btrfs_root *root,
7608                           struct block_info *bits,
7609                           int bits_nr,
7610                           u64 *last,
7611                           struct cache_tree *pending,
7612                           struct cache_tree *seen,
7613                           struct cache_tree *reada,
7614                           struct cache_tree *nodes,
7615                           struct cache_tree *extent_cache,
7616                           struct cache_tree *chunk_cache,
7617                           struct rb_root *dev_cache,
7618                           struct block_group_tree *block_group_cache,
7619                           struct device_extent_tree *dev_extent_cache,
7620                           struct root_item_record *ri)
7621 {
7622         struct btrfs_fs_info *fs_info = root->fs_info;
7623         struct extent_buffer *buf;
7624         struct extent_record *rec = NULL;
7625         u64 bytenr;
7626         u32 size;
7627         u64 parent;
7628         u64 owner;
7629         u64 flags;
7630         u64 ptr;
7631         u64 gen = 0;
7632         int ret = 0;
7633         int i;
7634         int nritems;
7635         struct btrfs_key key;
7636         struct cache_extent *cache;
7637         int reada_bits;
7638
7639         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7640                                     bits_nr, &reada_bits);
7641         if (nritems == 0)
7642                 return 1;
7643
7644         if (!reada_bits) {
7645                 for(i = 0; i < nritems; i++) {
7646                         ret = add_cache_extent(reada, bits[i].start,
7647                                                bits[i].size);
7648                         if (ret == -EEXIST)
7649                                 continue;
7650
7651                         /* fixme, get the parent transid */
7652                         readahead_tree_block(fs_info, bits[i].start, 0);
7653                 }
7654         }
7655         *last = bits[0].start;
7656         bytenr = bits[0].start;
7657         size = bits[0].size;
7658
7659         cache = lookup_cache_extent(pending, bytenr, size);
7660         if (cache) {
7661                 remove_cache_extent(pending, cache);
7662                 free(cache);
7663         }
7664         cache = lookup_cache_extent(reada, bytenr, size);
7665         if (cache) {
7666                 remove_cache_extent(reada, cache);
7667                 free(cache);
7668         }
7669         cache = lookup_cache_extent(nodes, bytenr, size);
7670         if (cache) {
7671                 remove_cache_extent(nodes, cache);
7672                 free(cache);
7673         }
7674         cache = lookup_cache_extent(extent_cache, bytenr, size);
7675         if (cache) {
7676                 rec = container_of(cache, struct extent_record, cache);
7677                 gen = rec->parent_generation;
7678         }
7679
7680         /* fixme, get the real parent transid */
7681         buf = read_tree_block(root->fs_info, bytenr, gen);
7682         if (!extent_buffer_uptodate(buf)) {
7683                 record_bad_block_io(root->fs_info,
7684                                     extent_cache, bytenr, size);
7685                 goto out;
7686         }
7687
7688         nritems = btrfs_header_nritems(buf);
7689
7690         flags = 0;
7691         if (!init_extent_tree) {
7692                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7693                                        btrfs_header_level(buf), 1, NULL,
7694                                        &flags);
7695                 if (ret < 0) {
7696                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7697                         if (ret < 0) {
7698                                 fprintf(stderr, "Couldn't calc extent flags\n");
7699                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7700                         }
7701                 }
7702         } else {
7703                 flags = 0;
7704                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7705                 if (ret < 0) {
7706                         fprintf(stderr, "Couldn't calc extent flags\n");
7707                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7708                 }
7709         }
7710
7711         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7712                 if (ri != NULL &&
7713                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7714                     ri->objectid == btrfs_header_owner(buf)) {
7715                         /*
7716                          * Ok we got to this block from it's original owner and
7717                          * we have FULL_BACKREF set.  Relocation can leave
7718                          * converted blocks over so this is altogether possible,
7719                          * however it's not possible if the generation > the
7720                          * last snapshot, so check for this case.
7721                          */
7722                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7723                             btrfs_header_generation(buf) > ri->last_snapshot) {
7724                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7725                                 rec->bad_full_backref = 1;
7726                         }
7727                 }
7728         } else {
7729                 if (ri != NULL &&
7730                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7731                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7732                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7733                         rec->bad_full_backref = 1;
7734                 }
7735         }
7736
7737         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7738                 rec->flag_block_full_backref = 1;
7739                 parent = bytenr;
7740                 owner = 0;
7741         } else {
7742                 rec->flag_block_full_backref = 0;
7743                 parent = 0;
7744                 owner = btrfs_header_owner(buf);
7745         }
7746
7747         ret = check_block(root, extent_cache, buf, flags);
7748         if (ret)
7749                 goto out;
7750
7751         if (btrfs_is_leaf(buf)) {
7752                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7753                 for (i = 0; i < nritems; i++) {
7754                         struct btrfs_file_extent_item *fi;
7755                         btrfs_item_key_to_cpu(buf, &key, i);
7756                         /*
7757                          * Check key type against the leaf owner.
7758                          * Could filter quite a lot of early error if
7759                          * owner is correct
7760                          */
7761                         if (check_type_with_root(btrfs_header_owner(buf),
7762                                                  key.type)) {
7763                                 fprintf(stderr, "ignoring invalid key\n");
7764                                 continue;
7765                         }
7766                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7767                                 process_extent_item(root, extent_cache, buf,
7768                                                     i);
7769                                 continue;
7770                         }
7771                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7772                                 process_extent_item(root, extent_cache, buf,
7773                                                     i);
7774                                 continue;
7775                         }
7776                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7777                                 total_csum_bytes +=
7778                                         btrfs_item_size_nr(buf, i);
7779                                 continue;
7780                         }
7781                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7782                                 process_chunk_item(chunk_cache, &key, buf, i);
7783                                 continue;
7784                         }
7785                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7786                                 process_device_item(dev_cache, &key, buf, i);
7787                                 continue;
7788                         }
7789                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7790                                 process_block_group_item(block_group_cache,
7791                                         &key, buf, i);
7792                                 continue;
7793                         }
7794                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7795                                 process_device_extent_item(dev_extent_cache,
7796                                         &key, buf, i);
7797                                 continue;
7798
7799                         }
7800                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7801 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7802                                 process_extent_ref_v0(extent_cache, buf, i);
7803 #else
7804                                 BUG();
7805 #endif
7806                                 continue;
7807                         }
7808
7809                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7810                                 ret = add_tree_backref(extent_cache,
7811                                                 key.objectid, 0, key.offset, 0);
7812                                 if (ret < 0)
7813                                         error(
7814                                 "add_tree_backref failed (leaf tree block): %s",
7815                                               strerror(-ret));
7816                                 continue;
7817                         }
7818                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7819                                 ret = add_tree_backref(extent_cache,
7820                                                 key.objectid, key.offset, 0, 0);
7821                                 if (ret < 0)
7822                                         error(
7823                                 "add_tree_backref failed (leaf shared block): %s",
7824                                               strerror(-ret));
7825                                 continue;
7826                         }
7827                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7828                                 struct btrfs_extent_data_ref *ref;
7829                                 ref = btrfs_item_ptr(buf, i,
7830                                                 struct btrfs_extent_data_ref);
7831                                 add_data_backref(extent_cache,
7832                                         key.objectid, 0,
7833                                         btrfs_extent_data_ref_root(buf, ref),
7834                                         btrfs_extent_data_ref_objectid(buf,
7835                                                                        ref),
7836                                         btrfs_extent_data_ref_offset(buf, ref),
7837                                         btrfs_extent_data_ref_count(buf, ref),
7838                                         0, root->fs_info->sectorsize);
7839                                 continue;
7840                         }
7841                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7842                                 struct btrfs_shared_data_ref *ref;
7843                                 ref = btrfs_item_ptr(buf, i,
7844                                                 struct btrfs_shared_data_ref);
7845                                 add_data_backref(extent_cache,
7846                                         key.objectid, key.offset, 0, 0, 0,
7847                                         btrfs_shared_data_ref_count(buf, ref),
7848                                         0, root->fs_info->sectorsize);
7849                                 continue;
7850                         }
7851                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7852                                 struct bad_item *bad;
7853
7854                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7855                                         continue;
7856                                 if (!owner)
7857                                         continue;
7858                                 bad = malloc(sizeof(struct bad_item));
7859                                 if (!bad)
7860                                         continue;
7861                                 INIT_LIST_HEAD(&bad->list);
7862                                 memcpy(&bad->key, &key,
7863                                        sizeof(struct btrfs_key));
7864                                 bad->root_id = owner;
7865                                 list_add_tail(&bad->list, &delete_items);
7866                                 continue;
7867                         }
7868                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7869                                 continue;
7870                         fi = btrfs_item_ptr(buf, i,
7871                                             struct btrfs_file_extent_item);
7872                         if (btrfs_file_extent_type(buf, fi) ==
7873                             BTRFS_FILE_EXTENT_INLINE)
7874                                 continue;
7875                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7876                                 continue;
7877
7878                         data_bytes_allocated +=
7879                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7880                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7881                                 abort();
7882                         }
7883                         data_bytes_referenced +=
7884                                 btrfs_file_extent_num_bytes(buf, fi);
7885                         add_data_backref(extent_cache,
7886                                 btrfs_file_extent_disk_bytenr(buf, fi),
7887                                 parent, owner, key.objectid, key.offset -
7888                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7889                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7890                 }
7891         } else {
7892                 int level;
7893                 struct btrfs_key first_key;
7894
7895                 first_key.objectid = 0;
7896
7897                 if (nritems > 0)
7898                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7899                 level = btrfs_header_level(buf);
7900                 for (i = 0; i < nritems; i++) {
7901                         struct extent_record tmpl;
7902
7903                         ptr = btrfs_node_blockptr(buf, i);
7904                         size = root->fs_info->nodesize;
7905                         btrfs_node_key_to_cpu(buf, &key, i);
7906                         if (ri != NULL) {
7907                                 if ((level == ri->drop_level)
7908                                     && is_dropped_key(&key, &ri->drop_key)) {
7909                                         continue;
7910                                 }
7911                         }
7912
7913                         memset(&tmpl, 0, sizeof(tmpl));
7914                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7915                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7916                         tmpl.start = ptr;
7917                         tmpl.nr = size;
7918                         tmpl.refs = 1;
7919                         tmpl.metadata = 1;
7920                         tmpl.max_size = size;
7921                         ret = add_extent_rec(extent_cache, &tmpl);
7922                         if (ret < 0)
7923                                 goto out;
7924
7925                         ret = add_tree_backref(extent_cache, ptr, parent,
7926                                         owner, 1);
7927                         if (ret < 0) {
7928                                 error(
7929                                 "add_tree_backref failed (non-leaf block): %s",
7930                                       strerror(-ret));
7931                                 continue;
7932                         }
7933
7934                         if (level > 1) {
7935                                 add_pending(nodes, seen, ptr, size);
7936                         } else {
7937                                 add_pending(pending, seen, ptr, size);
7938                         }
7939                 }
7940                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7941                                       nritems) * sizeof(struct btrfs_key_ptr);
7942         }
7943         total_btree_bytes += buf->len;
7944         if (fs_root_objectid(btrfs_header_owner(buf)))
7945                 total_fs_tree_bytes += buf->len;
7946         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7947                 total_extent_tree_bytes += buf->len;
7948 out:
7949         free_extent_buffer(buf);
7950         return ret;
7951 }
7952
7953 static int add_root_to_pending(struct extent_buffer *buf,
7954                                struct cache_tree *extent_cache,
7955                                struct cache_tree *pending,
7956                                struct cache_tree *seen,
7957                                struct cache_tree *nodes,
7958                                u64 objectid)
7959 {
7960         struct extent_record tmpl;
7961         int ret;
7962
7963         if (btrfs_header_level(buf) > 0)
7964                 add_pending(nodes, seen, buf->start, buf->len);
7965         else
7966                 add_pending(pending, seen, buf->start, buf->len);
7967
7968         memset(&tmpl, 0, sizeof(tmpl));
7969         tmpl.start = buf->start;
7970         tmpl.nr = buf->len;
7971         tmpl.is_root = 1;
7972         tmpl.refs = 1;
7973         tmpl.metadata = 1;
7974         tmpl.max_size = buf->len;
7975         add_extent_rec(extent_cache, &tmpl);
7976
7977         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7978             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7979                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7980                                 0, 1);
7981         else
7982                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7983                                 1);
7984         return ret;
7985 }
7986
7987 /* as we fix the tree, we might be deleting blocks that
7988  * we're tracking for repair.  This hook makes sure we
7989  * remove any backrefs for blocks as we are fixing them.
7990  */
7991 static int free_extent_hook(struct btrfs_trans_handle *trans,
7992                             struct btrfs_root *root,
7993                             u64 bytenr, u64 num_bytes, u64 parent,
7994                             u64 root_objectid, u64 owner, u64 offset,
7995                             int refs_to_drop)
7996 {
7997         struct extent_record *rec;
7998         struct cache_extent *cache;
7999         int is_data;
8000         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8001
8002         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8003         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8004         if (!cache)
8005                 return 0;
8006
8007         rec = container_of(cache, struct extent_record, cache);
8008         if (is_data) {
8009                 struct data_backref *back;
8010                 back = find_data_backref(rec, parent, root_objectid, owner,
8011                                          offset, 1, bytenr, num_bytes);
8012                 if (!back)
8013                         goto out;
8014                 if (back->node.found_ref) {
8015                         back->found_ref -= refs_to_drop;
8016                         if (rec->refs)
8017                                 rec->refs -= refs_to_drop;
8018                 }
8019                 if (back->node.found_extent_tree) {
8020                         back->num_refs -= refs_to_drop;
8021                         if (rec->extent_item_refs)
8022                                 rec->extent_item_refs -= refs_to_drop;
8023                 }
8024                 if (back->found_ref == 0)
8025                         back->node.found_ref = 0;
8026                 if (back->num_refs == 0)
8027                         back->node.found_extent_tree = 0;
8028
8029                 if (!back->node.found_extent_tree && back->node.found_ref) {
8030                         list_del(&back->node.list);
8031                         free(back);
8032                 }
8033         } else {
8034                 struct tree_backref *back;
8035                 back = find_tree_backref(rec, parent, root_objectid);
8036                 if (!back)
8037                         goto out;
8038                 if (back->node.found_ref) {
8039                         if (rec->refs)
8040                                 rec->refs--;
8041                         back->node.found_ref = 0;
8042                 }
8043                 if (back->node.found_extent_tree) {
8044                         if (rec->extent_item_refs)
8045                                 rec->extent_item_refs--;
8046                         back->node.found_extent_tree = 0;
8047                 }
8048                 if (!back->node.found_extent_tree && back->node.found_ref) {
8049                         list_del(&back->node.list);
8050                         free(back);
8051                 }
8052         }
8053         maybe_free_extent_rec(extent_cache, rec);
8054 out:
8055         return 0;
8056 }
8057
8058 static int delete_extent_records(struct btrfs_trans_handle *trans,
8059                                  struct btrfs_root *root,
8060                                  struct btrfs_path *path,
8061                                  u64 bytenr)
8062 {
8063         struct btrfs_key key;
8064         struct btrfs_key found_key;
8065         struct extent_buffer *leaf;
8066         int ret;
8067         int slot;
8068
8069
8070         key.objectid = bytenr;
8071         key.type = (u8)-1;
8072         key.offset = (u64)-1;
8073
8074         while(1) {
8075                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8076                                         &key, path, 0, 1);
8077                 if (ret < 0)
8078                         break;
8079
8080                 if (ret > 0) {
8081                         ret = 0;
8082                         if (path->slots[0] == 0)
8083                                 break;
8084                         path->slots[0]--;
8085                 }
8086                 ret = 0;
8087
8088                 leaf = path->nodes[0];
8089                 slot = path->slots[0];
8090
8091                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8092                 if (found_key.objectid != bytenr)
8093                         break;
8094
8095                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8096                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8097                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8098                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8099                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8100                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8101                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8102                         btrfs_release_path(path);
8103                         if (found_key.type == 0) {
8104                                 if (found_key.offset == 0)
8105                                         break;
8106                                 key.offset = found_key.offset - 1;
8107                                 key.type = found_key.type;
8108                         }
8109                         key.type = found_key.type - 1;
8110                         key.offset = (u64)-1;
8111                         continue;
8112                 }
8113
8114                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8115                         found_key.objectid, found_key.type, found_key.offset);
8116
8117                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8118                 if (ret)
8119                         break;
8120                 btrfs_release_path(path);
8121
8122                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8123                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8124                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8125                                 found_key.offset : root->fs_info->nodesize;
8126
8127                         ret = btrfs_update_block_group(trans, root, bytenr,
8128                                                        bytes, 0, 0);
8129                         if (ret)
8130                                 break;
8131                 }
8132         }
8133
8134         btrfs_release_path(path);
8135         return ret;
8136 }
8137
8138 /*
8139  * for a single backref, this will allocate a new extent
8140  * and add the backref to it.
8141  */
8142 static int record_extent(struct btrfs_trans_handle *trans,
8143                          struct btrfs_fs_info *info,
8144                          struct btrfs_path *path,
8145                          struct extent_record *rec,
8146                          struct extent_backref *back,
8147                          int allocated, u64 flags)
8148 {
8149         int ret = 0;
8150         struct btrfs_root *extent_root = info->extent_root;
8151         struct extent_buffer *leaf;
8152         struct btrfs_key ins_key;
8153         struct btrfs_extent_item *ei;
8154         struct data_backref *dback;
8155         struct btrfs_tree_block_info *bi;
8156
8157         if (!back->is_data)
8158                 rec->max_size = max_t(u64, rec->max_size,
8159                                     info->nodesize);
8160
8161         if (!allocated) {
8162                 u32 item_size = sizeof(*ei);
8163
8164                 if (!back->is_data)
8165                         item_size += sizeof(*bi);
8166
8167                 ins_key.objectid = rec->start;
8168                 ins_key.offset = rec->max_size;
8169                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8170
8171                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8172                                         &ins_key, item_size);
8173                 if (ret)
8174                         goto fail;
8175
8176                 leaf = path->nodes[0];
8177                 ei = btrfs_item_ptr(leaf, path->slots[0],
8178                                     struct btrfs_extent_item);
8179
8180                 btrfs_set_extent_refs(leaf, ei, 0);
8181                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8182
8183                 if (back->is_data) {
8184                         btrfs_set_extent_flags(leaf, ei,
8185                                                BTRFS_EXTENT_FLAG_DATA);
8186                 } else {
8187                         struct btrfs_disk_key copy_key;;
8188
8189                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8190                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8191                                              sizeof(*bi));
8192
8193                         btrfs_set_disk_key_objectid(&copy_key,
8194                                                     rec->info_objectid);
8195                         btrfs_set_disk_key_type(&copy_key, 0);
8196                         btrfs_set_disk_key_offset(&copy_key, 0);
8197
8198                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8199                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8200
8201                         btrfs_set_extent_flags(leaf, ei,
8202                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8203                 }
8204
8205                 btrfs_mark_buffer_dirty(leaf);
8206                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8207                                                rec->max_size, 1, 0);
8208                 if (ret)
8209                         goto fail;
8210                 btrfs_release_path(path);
8211         }
8212
8213         if (back->is_data) {
8214                 u64 parent;
8215                 int i;
8216
8217                 dback = to_data_backref(back);
8218                 if (back->full_backref)
8219                         parent = dback->parent;
8220                 else
8221                         parent = 0;
8222
8223                 for (i = 0; i < dback->found_ref; i++) {
8224                         /* if parent != 0, we're doing a full backref
8225                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8226                          * just makes the backref allocator create a data
8227                          * backref
8228                          */
8229                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8230                                                    rec->start, rec->max_size,
8231                                                    parent,
8232                                                    dback->root,
8233                                                    parent ?
8234                                                    BTRFS_FIRST_FREE_OBJECTID :
8235                                                    dback->owner,
8236                                                    dback->offset);
8237                         if (ret)
8238                                 break;
8239                 }
8240                 fprintf(stderr, "adding new data backref"
8241                                 " on %llu %s %llu owner %llu"
8242                                 " offset %llu found %d\n",
8243                                 (unsigned long long)rec->start,
8244                                 back->full_backref ?
8245                                 "parent" : "root",
8246                                 back->full_backref ?
8247                                 (unsigned long long)parent :
8248                                 (unsigned long long)dback->root,
8249                                 (unsigned long long)dback->owner,
8250                                 (unsigned long long)dback->offset,
8251                                 dback->found_ref);
8252         } else {
8253                 u64 parent;
8254                 struct tree_backref *tback;
8255
8256                 tback = to_tree_backref(back);
8257                 if (back->full_backref)
8258                         parent = tback->parent;
8259                 else
8260                         parent = 0;
8261
8262                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8263                                            rec->start, rec->max_size,
8264                                            parent, tback->root, 0, 0);
8265                 fprintf(stderr, "adding new tree backref on "
8266                         "start %llu len %llu parent %llu root %llu\n",
8267                         rec->start, rec->max_size, parent, tback->root);
8268         }
8269 fail:
8270         btrfs_release_path(path);
8271         return ret;
8272 }
8273
8274 static struct extent_entry *find_entry(struct list_head *entries,
8275                                        u64 bytenr, u64 bytes)
8276 {
8277         struct extent_entry *entry = NULL;
8278
8279         list_for_each_entry(entry, entries, list) {
8280                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8281                         return entry;
8282         }
8283
8284         return NULL;
8285 }
8286
8287 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8288 {
8289         struct extent_entry *entry, *best = NULL, *prev = NULL;
8290
8291         list_for_each_entry(entry, entries, list) {
8292                 /*
8293                  * If there are as many broken entries as entries then we know
8294                  * not to trust this particular entry.
8295                  */
8296                 if (entry->broken == entry->count)
8297                         continue;
8298
8299                 /*
8300                  * Special case, when there are only two entries and 'best' is
8301                  * the first one
8302                  */
8303                 if (!prev) {
8304                         best = entry;
8305                         prev = entry;
8306                         continue;
8307                 }
8308
8309                 /*
8310                  * If our current entry == best then we can't be sure our best
8311                  * is really the best, so we need to keep searching.
8312                  */
8313                 if (best && best->count == entry->count) {
8314                         prev = entry;
8315                         best = NULL;
8316                         continue;
8317                 }
8318
8319                 /* Prev == entry, not good enough, have to keep searching */
8320                 if (!prev->broken && prev->count == entry->count)
8321                         continue;
8322
8323                 if (!best)
8324                         best = (prev->count > entry->count) ? prev : entry;
8325                 else if (best->count < entry->count)
8326                         best = entry;
8327                 prev = entry;
8328         }
8329
8330         return best;
8331 }
8332
8333 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8334                       struct data_backref *dback, struct extent_entry *entry)
8335 {
8336         struct btrfs_trans_handle *trans;
8337         struct btrfs_root *root;
8338         struct btrfs_file_extent_item *fi;
8339         struct extent_buffer *leaf;
8340         struct btrfs_key key;
8341         u64 bytenr, bytes;
8342         int ret, err;
8343
8344         key.objectid = dback->root;
8345         key.type = BTRFS_ROOT_ITEM_KEY;
8346         key.offset = (u64)-1;
8347         root = btrfs_read_fs_root(info, &key);
8348         if (IS_ERR(root)) {
8349                 fprintf(stderr, "Couldn't find root for our ref\n");
8350                 return -EINVAL;
8351         }
8352
8353         /*
8354          * The backref points to the original offset of the extent if it was
8355          * split, so we need to search down to the offset we have and then walk
8356          * forward until we find the backref we're looking for.
8357          */
8358         key.objectid = dback->owner;
8359         key.type = BTRFS_EXTENT_DATA_KEY;
8360         key.offset = dback->offset;
8361         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8362         if (ret < 0) {
8363                 fprintf(stderr, "Error looking up ref %d\n", ret);
8364                 return ret;
8365         }
8366
8367         while (1) {
8368                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8369                         ret = btrfs_next_leaf(root, path);
8370                         if (ret) {
8371                                 fprintf(stderr, "Couldn't find our ref, next\n");
8372                                 return -EINVAL;
8373                         }
8374                 }
8375                 leaf = path->nodes[0];
8376                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8377                 if (key.objectid != dback->owner ||
8378                     key.type != BTRFS_EXTENT_DATA_KEY) {
8379                         fprintf(stderr, "Couldn't find our ref, search\n");
8380                         return -EINVAL;
8381                 }
8382                 fi = btrfs_item_ptr(leaf, path->slots[0],
8383                                     struct btrfs_file_extent_item);
8384                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8385                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8386
8387                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8388                         break;
8389                 path->slots[0]++;
8390         }
8391
8392         btrfs_release_path(path);
8393
8394         trans = btrfs_start_transaction(root, 1);
8395         if (IS_ERR(trans))
8396                 return PTR_ERR(trans);
8397
8398         /*
8399          * Ok we have the key of the file extent we want to fix, now we can cow
8400          * down to the thing and fix it.
8401          */
8402         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8403         if (ret < 0) {
8404                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8405                         key.objectid, key.type, key.offset, ret);
8406                 goto out;
8407         }
8408         if (ret > 0) {
8409                 fprintf(stderr, "Well that's odd, we just found this key "
8410                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8411                         key.offset);
8412                 ret = -EINVAL;
8413                 goto out;
8414         }
8415         leaf = path->nodes[0];
8416         fi = btrfs_item_ptr(leaf, path->slots[0],
8417                             struct btrfs_file_extent_item);
8418
8419         if (btrfs_file_extent_compression(leaf, fi) &&
8420             dback->disk_bytenr != entry->bytenr) {
8421                 fprintf(stderr, "Ref doesn't match the record start and is "
8422                         "compressed, please take a btrfs-image of this file "
8423                         "system and send it to a btrfs developer so they can "
8424                         "complete this functionality for bytenr %Lu\n",
8425                         dback->disk_bytenr);
8426                 ret = -EINVAL;
8427                 goto out;
8428         }
8429
8430         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8431                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8432         } else if (dback->disk_bytenr > entry->bytenr) {
8433                 u64 off_diff, offset;
8434
8435                 off_diff = dback->disk_bytenr - entry->bytenr;
8436                 offset = btrfs_file_extent_offset(leaf, fi);
8437                 if (dback->disk_bytenr + offset +
8438                     btrfs_file_extent_num_bytes(leaf, fi) >
8439                     entry->bytenr + entry->bytes) {
8440                         fprintf(stderr, "Ref is past the entry end, please "
8441                                 "take a btrfs-image of this file system and "
8442                                 "send it to a btrfs developer, ref %Lu\n",
8443                                 dback->disk_bytenr);
8444                         ret = -EINVAL;
8445                         goto out;
8446                 }
8447                 offset += off_diff;
8448                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8449                 btrfs_set_file_extent_offset(leaf, fi, offset);
8450         } else if (dback->disk_bytenr < entry->bytenr) {
8451                 u64 offset;
8452
8453                 offset = btrfs_file_extent_offset(leaf, fi);
8454                 if (dback->disk_bytenr + offset < entry->bytenr) {
8455                         fprintf(stderr, "Ref is before the entry start, please"
8456                                 " take a btrfs-image of this file system and "
8457                                 "send it to a btrfs developer, ref %Lu\n",
8458                                 dback->disk_bytenr);
8459                         ret = -EINVAL;
8460                         goto out;
8461                 }
8462
8463                 offset += dback->disk_bytenr;
8464                 offset -= entry->bytenr;
8465                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8466                 btrfs_set_file_extent_offset(leaf, fi, offset);
8467         }
8468
8469         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8470
8471         /*
8472          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8473          * only do this if we aren't using compression, otherwise it's a
8474          * trickier case.
8475          */
8476         if (!btrfs_file_extent_compression(leaf, fi))
8477                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8478         else
8479                 printf("ram bytes may be wrong?\n");
8480         btrfs_mark_buffer_dirty(leaf);
8481 out:
8482         err = btrfs_commit_transaction(trans, root);
8483         btrfs_release_path(path);
8484         return ret ? ret : err;
8485 }
8486
8487 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8488                            struct extent_record *rec)
8489 {
8490         struct extent_backref *back;
8491         struct data_backref *dback;
8492         struct extent_entry *entry, *best = NULL;
8493         LIST_HEAD(entries);
8494         int nr_entries = 0;
8495         int broken_entries = 0;
8496         int ret = 0;
8497         short mismatch = 0;
8498
8499         /*
8500          * Metadata is easy and the backrefs should always agree on bytenr and
8501          * size, if not we've got bigger issues.
8502          */
8503         if (rec->metadata)
8504                 return 0;
8505
8506         list_for_each_entry(back, &rec->backrefs, list) {
8507                 if (back->full_backref || !back->is_data)
8508                         continue;
8509
8510                 dback = to_data_backref(back);
8511
8512                 /*
8513                  * We only pay attention to backrefs that we found a real
8514                  * backref for.
8515                  */
8516                 if (dback->found_ref == 0)
8517                         continue;
8518
8519                 /*
8520                  * For now we only catch when the bytes don't match, not the
8521                  * bytenr.  We can easily do this at the same time, but I want
8522                  * to have a fs image to test on before we just add repair
8523                  * functionality willy-nilly so we know we won't screw up the
8524                  * repair.
8525                  */
8526
8527                 entry = find_entry(&entries, dback->disk_bytenr,
8528                                    dback->bytes);
8529                 if (!entry) {
8530                         entry = malloc(sizeof(struct extent_entry));
8531                         if (!entry) {
8532                                 ret = -ENOMEM;
8533                                 goto out;
8534                         }
8535                         memset(entry, 0, sizeof(*entry));
8536                         entry->bytenr = dback->disk_bytenr;
8537                         entry->bytes = dback->bytes;
8538                         list_add_tail(&entry->list, &entries);
8539                         nr_entries++;
8540                 }
8541
8542                 /*
8543                  * If we only have on entry we may think the entries agree when
8544                  * in reality they don't so we have to do some extra checking.
8545                  */
8546                 if (dback->disk_bytenr != rec->start ||
8547                     dback->bytes != rec->nr || back->broken)
8548                         mismatch = 1;
8549
8550                 if (back->broken) {
8551                         entry->broken++;
8552                         broken_entries++;
8553                 }
8554
8555                 entry->count++;
8556         }
8557
8558         /* Yay all the backrefs agree, carry on good sir */
8559         if (nr_entries <= 1 && !mismatch)
8560                 goto out;
8561
8562         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8563                 "%Lu\n", rec->start);
8564
8565         /*
8566          * First we want to see if the backrefs can agree amongst themselves who
8567          * is right, so figure out which one of the entries has the highest
8568          * count.
8569          */
8570         best = find_most_right_entry(&entries);
8571
8572         /*
8573          * Ok so we may have an even split between what the backrefs think, so
8574          * this is where we use the extent ref to see what it thinks.
8575          */
8576         if (!best) {
8577                 entry = find_entry(&entries, rec->start, rec->nr);
8578                 if (!entry && (!broken_entries || !rec->found_rec)) {
8579                         fprintf(stderr, "Backrefs don't agree with each other "
8580                                 "and extent record doesn't agree with anybody,"
8581                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8582                                 rec->start, rec->nr);
8583                         ret = -EINVAL;
8584                         goto out;
8585                 } else if (!entry) {
8586                         /*
8587                          * Ok our backrefs were broken, we'll assume this is the
8588                          * correct value and add an entry for this range.
8589                          */
8590                         entry = malloc(sizeof(struct extent_entry));
8591                         if (!entry) {
8592                                 ret = -ENOMEM;
8593                                 goto out;
8594                         }
8595                         memset(entry, 0, sizeof(*entry));
8596                         entry->bytenr = rec->start;
8597                         entry->bytes = rec->nr;
8598                         list_add_tail(&entry->list, &entries);
8599                         nr_entries++;
8600                 }
8601                 entry->count++;
8602                 best = find_most_right_entry(&entries);
8603                 if (!best) {
8604                         fprintf(stderr, "Backrefs and extent record evenly "
8605                                 "split on who is right, this is going to "
8606                                 "require user input to fix bytenr %Lu bytes "
8607                                 "%Lu\n", rec->start, rec->nr);
8608                         ret = -EINVAL;
8609                         goto out;
8610                 }
8611         }
8612
8613         /*
8614          * I don't think this can happen currently as we'll abort() if we catch
8615          * this case higher up, but in case somebody removes that we still can't
8616          * deal with it properly here yet, so just bail out of that's the case.
8617          */
8618         if (best->bytenr != rec->start) {
8619                 fprintf(stderr, "Extent start and backref starts don't match, "
8620                         "please use btrfs-image on this file system and send "
8621                         "it to a btrfs developer so they can make fsck fix "
8622                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8623                         rec->start, rec->nr);
8624                 ret = -EINVAL;
8625                 goto out;
8626         }
8627
8628         /*
8629          * Ok great we all agreed on an extent record, let's go find the real
8630          * references and fix up the ones that don't match.
8631          */
8632         list_for_each_entry(back, &rec->backrefs, list) {
8633                 if (back->full_backref || !back->is_data)
8634                         continue;
8635
8636                 dback = to_data_backref(back);
8637
8638                 /*
8639                  * Still ignoring backrefs that don't have a real ref attached
8640                  * to them.
8641                  */
8642                 if (dback->found_ref == 0)
8643                         continue;
8644
8645                 if (dback->bytes == best->bytes &&
8646                     dback->disk_bytenr == best->bytenr)
8647                         continue;
8648
8649                 ret = repair_ref(info, path, dback, best);
8650                 if (ret)
8651                         goto out;
8652         }
8653
8654         /*
8655          * Ok we messed with the actual refs, which means we need to drop our
8656          * entire cache and go back and rescan.  I know this is a huge pain and
8657          * adds a lot of extra work, but it's the only way to be safe.  Once all
8658          * the backrefs agree we may not need to do anything to the extent
8659          * record itself.
8660          */
8661         ret = -EAGAIN;
8662 out:
8663         while (!list_empty(&entries)) {
8664                 entry = list_entry(entries.next, struct extent_entry, list);
8665                 list_del_init(&entry->list);
8666                 free(entry);
8667         }
8668         return ret;
8669 }
8670
8671 static int process_duplicates(struct cache_tree *extent_cache,
8672                               struct extent_record *rec)
8673 {
8674         struct extent_record *good, *tmp;
8675         struct cache_extent *cache;
8676         int ret;
8677
8678         /*
8679          * If we found a extent record for this extent then return, or if we
8680          * have more than one duplicate we are likely going to need to delete
8681          * something.
8682          */
8683         if (rec->found_rec || rec->num_duplicates > 1)
8684                 return 0;
8685
8686         /* Shouldn't happen but just in case */
8687         BUG_ON(!rec->num_duplicates);
8688
8689         /*
8690          * So this happens if we end up with a backref that doesn't match the
8691          * actual extent entry.  So either the backref is bad or the extent
8692          * entry is bad.  Either way we want to have the extent_record actually
8693          * reflect what we found in the extent_tree, so we need to take the
8694          * duplicate out and use that as the extent_record since the only way we
8695          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8696          */
8697         remove_cache_extent(extent_cache, &rec->cache);
8698
8699         good = to_extent_record(rec->dups.next);
8700         list_del_init(&good->list);
8701         INIT_LIST_HEAD(&good->backrefs);
8702         INIT_LIST_HEAD(&good->dups);
8703         good->cache.start = good->start;
8704         good->cache.size = good->nr;
8705         good->content_checked = 0;
8706         good->owner_ref_checked = 0;
8707         good->num_duplicates = 0;
8708         good->refs = rec->refs;
8709         list_splice_init(&rec->backrefs, &good->backrefs);
8710         while (1) {
8711                 cache = lookup_cache_extent(extent_cache, good->start,
8712                                             good->nr);
8713                 if (!cache)
8714                         break;
8715                 tmp = container_of(cache, struct extent_record, cache);
8716
8717                 /*
8718                  * If we find another overlapping extent and it's found_rec is
8719                  * set then it's a duplicate and we need to try and delete
8720                  * something.
8721                  */
8722                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8723                         if (list_empty(&good->list))
8724                                 list_add_tail(&good->list,
8725                                               &duplicate_extents);
8726                         good->num_duplicates += tmp->num_duplicates + 1;
8727                         list_splice_init(&tmp->dups, &good->dups);
8728                         list_del_init(&tmp->list);
8729                         list_add_tail(&tmp->list, &good->dups);
8730                         remove_cache_extent(extent_cache, &tmp->cache);
8731                         continue;
8732                 }
8733
8734                 /*
8735                  * Ok we have another non extent item backed extent rec, so lets
8736                  * just add it to this extent and carry on like we did above.
8737                  */
8738                 good->refs += tmp->refs;
8739                 list_splice_init(&tmp->backrefs, &good->backrefs);
8740                 remove_cache_extent(extent_cache, &tmp->cache);
8741                 free(tmp);
8742         }
8743         ret = insert_cache_extent(extent_cache, &good->cache);
8744         BUG_ON(ret);
8745         free(rec);
8746         return good->num_duplicates ? 0 : 1;
8747 }
8748
8749 static int delete_duplicate_records(struct btrfs_root *root,
8750                                     struct extent_record *rec)
8751 {
8752         struct btrfs_trans_handle *trans;
8753         LIST_HEAD(delete_list);
8754         struct btrfs_path path;
8755         struct extent_record *tmp, *good, *n;
8756         int nr_del = 0;
8757         int ret = 0, err;
8758         struct btrfs_key key;
8759
8760         btrfs_init_path(&path);
8761
8762         good = rec;
8763         /* Find the record that covers all of the duplicates. */
8764         list_for_each_entry(tmp, &rec->dups, list) {
8765                 if (good->start < tmp->start)
8766                         continue;
8767                 if (good->nr > tmp->nr)
8768                         continue;
8769
8770                 if (tmp->start + tmp->nr < good->start + good->nr) {
8771                         fprintf(stderr, "Ok we have overlapping extents that "
8772                                 "aren't completely covered by each other, this "
8773                                 "is going to require more careful thought.  "
8774                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8775                                 tmp->start, tmp->nr, good->start, good->nr);
8776                         abort();
8777                 }
8778                 good = tmp;
8779         }
8780
8781         if (good != rec)
8782                 list_add_tail(&rec->list, &delete_list);
8783
8784         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8785                 if (tmp == good)
8786                         continue;
8787                 list_move_tail(&tmp->list, &delete_list);
8788         }
8789
8790         root = root->fs_info->extent_root;
8791         trans = btrfs_start_transaction(root, 1);
8792         if (IS_ERR(trans)) {
8793                 ret = PTR_ERR(trans);
8794                 goto out;
8795         }
8796
8797         list_for_each_entry(tmp, &delete_list, list) {
8798                 if (tmp->found_rec == 0)
8799                         continue;
8800                 key.objectid = tmp->start;
8801                 key.type = BTRFS_EXTENT_ITEM_KEY;
8802                 key.offset = tmp->nr;
8803
8804                 /* Shouldn't happen but just in case */
8805                 if (tmp->metadata) {
8806                         fprintf(stderr, "Well this shouldn't happen, extent "
8807                                 "record overlaps but is metadata? "
8808                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8809                         abort();
8810                 }
8811
8812                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8813                 if (ret) {
8814                         if (ret > 0)
8815                                 ret = -EINVAL;
8816                         break;
8817                 }
8818                 ret = btrfs_del_item(trans, root, &path);
8819                 if (ret)
8820                         break;
8821                 btrfs_release_path(&path);
8822                 nr_del++;
8823         }
8824         err = btrfs_commit_transaction(trans, root);
8825         if (err && !ret)
8826                 ret = err;
8827 out:
8828         while (!list_empty(&delete_list)) {
8829                 tmp = to_extent_record(delete_list.next);
8830                 list_del_init(&tmp->list);
8831                 if (tmp == rec)
8832                         continue;
8833                 free(tmp);
8834         }
8835
8836         while (!list_empty(&rec->dups)) {
8837                 tmp = to_extent_record(rec->dups.next);
8838                 list_del_init(&tmp->list);
8839                 free(tmp);
8840         }
8841
8842         btrfs_release_path(&path);
8843
8844         if (!ret && !nr_del)
8845                 rec->num_duplicates = 0;
8846
8847         return ret ? ret : nr_del;
8848 }
8849
8850 static int find_possible_backrefs(struct btrfs_fs_info *info,
8851                                   struct btrfs_path *path,
8852                                   struct cache_tree *extent_cache,
8853                                   struct extent_record *rec)
8854 {
8855         struct btrfs_root *root;
8856         struct extent_backref *back;
8857         struct data_backref *dback;
8858         struct cache_extent *cache;
8859         struct btrfs_file_extent_item *fi;
8860         struct btrfs_key key;
8861         u64 bytenr, bytes;
8862         int ret;
8863
8864         list_for_each_entry(back, &rec->backrefs, list) {
8865                 /* Don't care about full backrefs (poor unloved backrefs) */
8866                 if (back->full_backref || !back->is_data)
8867                         continue;
8868
8869                 dback = to_data_backref(back);
8870
8871                 /* We found this one, we don't need to do a lookup */
8872                 if (dback->found_ref)
8873                         continue;
8874
8875                 key.objectid = dback->root;
8876                 key.type = BTRFS_ROOT_ITEM_KEY;
8877                 key.offset = (u64)-1;
8878
8879                 root = btrfs_read_fs_root(info, &key);
8880
8881                 /* No root, definitely a bad ref, skip */
8882                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8883                         continue;
8884                 /* Other err, exit */
8885                 if (IS_ERR(root))
8886                         return PTR_ERR(root);
8887
8888                 key.objectid = dback->owner;
8889                 key.type = BTRFS_EXTENT_DATA_KEY;
8890                 key.offset = dback->offset;
8891                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8892                 if (ret) {
8893                         btrfs_release_path(path);
8894                         if (ret < 0)
8895                                 return ret;
8896                         /* Didn't find it, we can carry on */
8897                         ret = 0;
8898                         continue;
8899                 }
8900
8901                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8902                                     struct btrfs_file_extent_item);
8903                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8904                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8905                 btrfs_release_path(path);
8906                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8907                 if (cache) {
8908                         struct extent_record *tmp;
8909                         tmp = container_of(cache, struct extent_record, cache);
8910
8911                         /*
8912                          * If we found an extent record for the bytenr for this
8913                          * particular backref then we can't add it to our
8914                          * current extent record.  We only want to add backrefs
8915                          * that don't have a corresponding extent item in the
8916                          * extent tree since they likely belong to this record
8917                          * and we need to fix it if it doesn't match bytenrs.
8918                          */
8919                         if  (tmp->found_rec)
8920                                 continue;
8921                 }
8922
8923                 dback->found_ref += 1;
8924                 dback->disk_bytenr = bytenr;
8925                 dback->bytes = bytes;
8926
8927                 /*
8928                  * Set this so the verify backref code knows not to trust the
8929                  * values in this backref.
8930                  */
8931                 back->broken = 1;
8932         }
8933
8934         return 0;
8935 }
8936
8937 /*
8938  * Record orphan data ref into corresponding root.
8939  *
8940  * Return 0 if the extent item contains data ref and recorded.
8941  * Return 1 if the extent item contains no useful data ref
8942  *   On that case, it may contains only shared_dataref or metadata backref
8943  *   or the file extent exists(this should be handled by the extent bytenr
8944  *   recovery routine)
8945  * Return <0 if something goes wrong.
8946  */
8947 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8948                                       struct extent_record *rec)
8949 {
8950         struct btrfs_key key;
8951         struct btrfs_root *dest_root;
8952         struct extent_backref *back;
8953         struct data_backref *dback;
8954         struct orphan_data_extent *orphan;
8955         struct btrfs_path path;
8956         int recorded_data_ref = 0;
8957         int ret = 0;
8958
8959         if (rec->metadata)
8960                 return 1;
8961         btrfs_init_path(&path);
8962         list_for_each_entry(back, &rec->backrefs, list) {
8963                 if (back->full_backref || !back->is_data ||
8964                     !back->found_extent_tree)
8965                         continue;
8966                 dback = to_data_backref(back);
8967                 if (dback->found_ref)
8968                         continue;
8969                 key.objectid = dback->root;
8970                 key.type = BTRFS_ROOT_ITEM_KEY;
8971                 key.offset = (u64)-1;
8972
8973                 dest_root = btrfs_read_fs_root(fs_info, &key);
8974
8975                 /* For non-exist root we just skip it */
8976                 if (IS_ERR(dest_root) || !dest_root)
8977                         continue;
8978
8979                 key.objectid = dback->owner;
8980                 key.type = BTRFS_EXTENT_DATA_KEY;
8981                 key.offset = dback->offset;
8982
8983                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8984                 btrfs_release_path(&path);
8985                 /*
8986                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8987                  * we need to record it for inode/file extent rebuild.
8988                  * For ret > 0, we record it only for file extent rebuild.
8989                  * For ret == 0, the file extent exists but only bytenr
8990                  * mismatch, let the original bytenr fix routine to handle,
8991                  * don't record it.
8992                  */
8993                 if (ret == 0)
8994                         continue;
8995                 ret = 0;
8996                 orphan = malloc(sizeof(*orphan));
8997                 if (!orphan) {
8998                         ret = -ENOMEM;
8999                         goto out;
9000                 }
9001                 INIT_LIST_HEAD(&orphan->list);
9002                 orphan->root = dback->root;
9003                 orphan->objectid = dback->owner;
9004                 orphan->offset = dback->offset;
9005                 orphan->disk_bytenr = rec->cache.start;
9006                 orphan->disk_len = rec->cache.size;
9007                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9008                 recorded_data_ref = 1;
9009         }
9010 out:
9011         btrfs_release_path(&path);
9012         if (!ret)
9013                 return !recorded_data_ref;
9014         else
9015                 return ret;
9016 }
9017
9018 /*
9019  * when an incorrect extent item is found, this will delete
9020  * all of the existing entries for it and recreate them
9021  * based on what the tree scan found.
9022  */
9023 static int fixup_extent_refs(struct btrfs_fs_info *info,
9024                              struct cache_tree *extent_cache,
9025                              struct extent_record *rec)
9026 {
9027         struct btrfs_trans_handle *trans = NULL;
9028         int ret;
9029         struct btrfs_path path;
9030         struct list_head *cur = rec->backrefs.next;
9031         struct cache_extent *cache;
9032         struct extent_backref *back;
9033         int allocated = 0;
9034         u64 flags = 0;
9035
9036         if (rec->flag_block_full_backref)
9037                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9038
9039         btrfs_init_path(&path);
9040         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9041                 /*
9042                  * Sometimes the backrefs themselves are so broken they don't
9043                  * get attached to any meaningful rec, so first go back and
9044                  * check any of our backrefs that we couldn't find and throw
9045                  * them into the list if we find the backref so that
9046                  * verify_backrefs can figure out what to do.
9047                  */
9048                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9049                 if (ret < 0)
9050                         goto out;
9051         }
9052
9053         /* step one, make sure all of the backrefs agree */
9054         ret = verify_backrefs(info, &path, rec);
9055         if (ret < 0)
9056                 goto out;
9057
9058         trans = btrfs_start_transaction(info->extent_root, 1);
9059         if (IS_ERR(trans)) {
9060                 ret = PTR_ERR(trans);
9061                 goto out;
9062         }
9063
9064         /* step two, delete all the existing records */
9065         ret = delete_extent_records(trans, info->extent_root, &path,
9066                                     rec->start);
9067
9068         if (ret < 0)
9069                 goto out;
9070
9071         /* was this block corrupt?  If so, don't add references to it */
9072         cache = lookup_cache_extent(info->corrupt_blocks,
9073                                     rec->start, rec->max_size);
9074         if (cache) {
9075                 ret = 0;
9076                 goto out;
9077         }
9078
9079         /* step three, recreate all the refs we did find */
9080         while(cur != &rec->backrefs) {
9081                 back = to_extent_backref(cur);
9082                 cur = cur->next;
9083
9084                 /*
9085                  * if we didn't find any references, don't create a
9086                  * new extent record
9087                  */
9088                 if (!back->found_ref)
9089                         continue;
9090
9091                 rec->bad_full_backref = 0;
9092                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9093                 allocated = 1;
9094
9095                 if (ret)
9096                         goto out;
9097         }
9098 out:
9099         if (trans) {
9100                 int err = btrfs_commit_transaction(trans, info->extent_root);
9101                 if (!ret)
9102                         ret = err;
9103         }
9104
9105         if (!ret)
9106                 fprintf(stderr, "Repaired extent references for %llu\n",
9107                                 (unsigned long long)rec->start);
9108
9109         btrfs_release_path(&path);
9110         return ret;
9111 }
9112
9113 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9114                               struct extent_record *rec)
9115 {
9116         struct btrfs_trans_handle *trans;
9117         struct btrfs_root *root = fs_info->extent_root;
9118         struct btrfs_path path;
9119         struct btrfs_extent_item *ei;
9120         struct btrfs_key key;
9121         u64 flags;
9122         int ret = 0;
9123
9124         key.objectid = rec->start;
9125         if (rec->metadata) {
9126                 key.type = BTRFS_METADATA_ITEM_KEY;
9127                 key.offset = rec->info_level;
9128         } else {
9129                 key.type = BTRFS_EXTENT_ITEM_KEY;
9130                 key.offset = rec->max_size;
9131         }
9132
9133         trans = btrfs_start_transaction(root, 0);
9134         if (IS_ERR(trans))
9135                 return PTR_ERR(trans);
9136
9137         btrfs_init_path(&path);
9138         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9139         if (ret < 0) {
9140                 btrfs_release_path(&path);
9141                 btrfs_commit_transaction(trans, root);
9142                 return ret;
9143         } else if (ret) {
9144                 fprintf(stderr, "Didn't find extent for %llu\n",
9145                         (unsigned long long)rec->start);
9146                 btrfs_release_path(&path);
9147                 btrfs_commit_transaction(trans, root);
9148                 return -ENOENT;
9149         }
9150
9151         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9152                             struct btrfs_extent_item);
9153         flags = btrfs_extent_flags(path.nodes[0], ei);
9154         if (rec->flag_block_full_backref) {
9155                 fprintf(stderr, "setting full backref on %llu\n",
9156                         (unsigned long long)key.objectid);
9157                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9158         } else {
9159                 fprintf(stderr, "clearing full backref on %llu\n",
9160                         (unsigned long long)key.objectid);
9161                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9162         }
9163         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9164         btrfs_mark_buffer_dirty(path.nodes[0]);
9165         btrfs_release_path(&path);
9166         ret = btrfs_commit_transaction(trans, root);
9167         if (!ret)
9168                 fprintf(stderr, "Repaired extent flags for %llu\n",
9169                                 (unsigned long long)rec->start);
9170
9171         return ret;
9172 }
9173
9174 /* right now we only prune from the extent allocation tree */
9175 static int prune_one_block(struct btrfs_trans_handle *trans,
9176                            struct btrfs_fs_info *info,
9177                            struct btrfs_corrupt_block *corrupt)
9178 {
9179         int ret;
9180         struct btrfs_path path;
9181         struct extent_buffer *eb;
9182         u64 found;
9183         int slot;
9184         int nritems;
9185         int level = corrupt->level + 1;
9186
9187         btrfs_init_path(&path);
9188 again:
9189         /* we want to stop at the parent to our busted block */
9190         path.lowest_level = level;
9191
9192         ret = btrfs_search_slot(trans, info->extent_root,
9193                                 &corrupt->key, &path, -1, 1);
9194
9195         if (ret < 0)
9196                 goto out;
9197
9198         eb = path.nodes[level];
9199         if (!eb) {
9200                 ret = -ENOENT;
9201                 goto out;
9202         }
9203
9204         /*
9205          * hopefully the search gave us the block we want to prune,
9206          * lets try that first
9207          */
9208         slot = path.slots[level];
9209         found =  btrfs_node_blockptr(eb, slot);
9210         if (found == corrupt->cache.start)
9211                 goto del_ptr;
9212
9213         nritems = btrfs_header_nritems(eb);
9214
9215         /* the search failed, lets scan this node and hope we find it */
9216         for (slot = 0; slot < nritems; slot++) {
9217                 found =  btrfs_node_blockptr(eb, slot);
9218                 if (found == corrupt->cache.start)
9219                         goto del_ptr;
9220         }
9221         /*
9222          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9223          * to this block
9224          */
9225         if (eb == info->extent_root->node) {
9226                 ret = -ENOENT;
9227                 goto out;
9228         } else {
9229                 level++;
9230                 btrfs_release_path(&path);
9231                 goto again;
9232         }
9233
9234 del_ptr:
9235         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9236         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9237
9238 out:
9239         btrfs_release_path(&path);
9240         return ret;
9241 }
9242
9243 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9244 {
9245         struct btrfs_trans_handle *trans = NULL;
9246         struct cache_extent *cache;
9247         struct btrfs_corrupt_block *corrupt;
9248
9249         while (1) {
9250                 cache = search_cache_extent(info->corrupt_blocks, 0);
9251                 if (!cache)
9252                         break;
9253                 if (!trans) {
9254                         trans = btrfs_start_transaction(info->extent_root, 1);
9255                         if (IS_ERR(trans))
9256                                 return PTR_ERR(trans);
9257                 }
9258                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9259                 prune_one_block(trans, info, corrupt);
9260                 remove_cache_extent(info->corrupt_blocks, cache);
9261         }
9262         if (trans)
9263                 return btrfs_commit_transaction(trans, info->extent_root);
9264         return 0;
9265 }
9266
9267 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9268 {
9269         struct btrfs_block_group_cache *cache;
9270         u64 start, end;
9271         int ret;
9272
9273         while (1) {
9274                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9275                                             &start, &end, EXTENT_DIRTY);
9276                 if (ret)
9277                         break;
9278                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9279         }
9280
9281         start = 0;
9282         while (1) {
9283                 cache = btrfs_lookup_first_block_group(fs_info, start);
9284                 if (!cache)
9285                         break;
9286                 if (cache->cached)
9287                         cache->cached = 0;
9288                 start = cache->key.objectid + cache->key.offset;
9289         }
9290 }
9291
9292 static int check_extent_refs(struct btrfs_root *root,
9293                              struct cache_tree *extent_cache)
9294 {
9295         struct extent_record *rec;
9296         struct cache_extent *cache;
9297         int ret = 0;
9298         int had_dups = 0;
9299
9300         if (repair) {
9301                 /*
9302                  * if we're doing a repair, we have to make sure
9303                  * we don't allocate from the problem extents.
9304                  * In the worst case, this will be all the
9305                  * extents in the FS
9306                  */
9307                 cache = search_cache_extent(extent_cache, 0);
9308                 while(cache) {
9309                         rec = container_of(cache, struct extent_record, cache);
9310                         set_extent_dirty(root->fs_info->excluded_extents,
9311                                          rec->start,
9312                                          rec->start + rec->max_size - 1);
9313                         cache = next_cache_extent(cache);
9314                 }
9315
9316                 /* pin down all the corrupted blocks too */
9317                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9318                 while(cache) {
9319                         set_extent_dirty(root->fs_info->excluded_extents,
9320                                          cache->start,
9321                                          cache->start + cache->size - 1);
9322                         cache = next_cache_extent(cache);
9323                 }
9324                 prune_corrupt_blocks(root->fs_info);
9325                 reset_cached_block_groups(root->fs_info);
9326         }
9327
9328         reset_cached_block_groups(root->fs_info);
9329
9330         /*
9331          * We need to delete any duplicate entries we find first otherwise we
9332          * could mess up the extent tree when we have backrefs that actually
9333          * belong to a different extent item and not the weird duplicate one.
9334          */
9335         while (repair && !list_empty(&duplicate_extents)) {
9336                 rec = to_extent_record(duplicate_extents.next);
9337                 list_del_init(&rec->list);
9338
9339                 /* Sometimes we can find a backref before we find an actual
9340                  * extent, so we need to process it a little bit to see if there
9341                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9342                  * if this is a backref screwup.  If we need to delete stuff
9343                  * process_duplicates() will return 0, otherwise it will return
9344                  * 1 and we
9345                  */
9346                 if (process_duplicates(extent_cache, rec))
9347                         continue;
9348                 ret = delete_duplicate_records(root, rec);
9349                 if (ret < 0)
9350                         return ret;
9351                 /*
9352                  * delete_duplicate_records will return the number of entries
9353                  * deleted, so if it's greater than 0 then we know we actually
9354                  * did something and we need to remove.
9355                  */
9356                 if (ret)
9357                         had_dups = 1;
9358         }
9359
9360         if (had_dups)
9361                 return -EAGAIN;
9362
9363         while(1) {
9364                 int cur_err = 0;
9365                 int fix = 0;
9366
9367                 cache = search_cache_extent(extent_cache, 0);
9368                 if (!cache)
9369                         break;
9370                 rec = container_of(cache, struct extent_record, cache);
9371                 if (rec->num_duplicates) {
9372                         fprintf(stderr, "extent item %llu has multiple extent "
9373                                 "items\n", (unsigned long long)rec->start);
9374                         cur_err = 1;
9375                 }
9376
9377                 if (rec->refs != rec->extent_item_refs) {
9378                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9379                                 (unsigned long long)rec->start,
9380                                 (unsigned long long)rec->nr);
9381                         fprintf(stderr, "extent item %llu, found %llu\n",
9382                                 (unsigned long long)rec->extent_item_refs,
9383                                 (unsigned long long)rec->refs);
9384                         ret = record_orphan_data_extents(root->fs_info, rec);
9385                         if (ret < 0)
9386                                 goto repair_abort;
9387                         fix = ret;
9388                         cur_err = 1;
9389                 }
9390                 if (all_backpointers_checked(rec, 1)) {
9391                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9392                                 (unsigned long long)rec->start,
9393                                 (unsigned long long)rec->nr);
9394                         fix = 1;
9395                         cur_err = 1;
9396                 }
9397                 if (!rec->owner_ref_checked) {
9398                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9399                                 (unsigned long long)rec->start,
9400                                 (unsigned long long)rec->nr);
9401                         fix = 1;
9402                         cur_err = 1;
9403                 }
9404
9405                 if (repair && fix) {
9406                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9407                         if (ret)
9408                                 goto repair_abort;
9409                 }
9410
9411
9412                 if (rec->bad_full_backref) {
9413                         fprintf(stderr, "bad full backref, on [%llu]\n",
9414                                 (unsigned long long)rec->start);
9415                         if (repair) {
9416                                 ret = fixup_extent_flags(root->fs_info, rec);
9417                                 if (ret)
9418                                         goto repair_abort;
9419                                 fix = 1;
9420                         }
9421                         cur_err = 1;
9422                 }
9423                 /*
9424                  * Although it's not a extent ref's problem, we reuse this
9425                  * routine for error reporting.
9426                  * No repair function yet.
9427                  */
9428                 if (rec->crossing_stripes) {
9429                         fprintf(stderr,
9430                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9431                                 rec->start, rec->start + rec->max_size);
9432                         cur_err = 1;
9433                 }
9434
9435                 if (rec->wrong_chunk_type) {
9436                         fprintf(stderr,
9437                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9438                                 rec->start, rec->start + rec->max_size);
9439                         cur_err = 1;
9440                 }
9441
9442                 remove_cache_extent(extent_cache, cache);
9443                 free_all_extent_backrefs(rec);
9444                 if (!init_extent_tree && repair && (!cur_err || fix))
9445                         clear_extent_dirty(root->fs_info->excluded_extents,
9446                                            rec->start,
9447                                            rec->start + rec->max_size - 1);
9448                 free(rec);
9449         }
9450 repair_abort:
9451         if (repair) {
9452                 if (ret && ret != -EAGAIN) {
9453                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9454                         exit(1);
9455                 } else if (!ret) {
9456                         struct btrfs_trans_handle *trans;
9457
9458                         root = root->fs_info->extent_root;
9459                         trans = btrfs_start_transaction(root, 1);
9460                         if (IS_ERR(trans)) {
9461                                 ret = PTR_ERR(trans);
9462                                 goto repair_abort;
9463                         }
9464
9465                         ret = btrfs_fix_block_accounting(trans, root);
9466                         if (ret)
9467                                 goto repair_abort;
9468                         ret = btrfs_commit_transaction(trans, root);
9469                         if (ret)
9470                                 goto repair_abort;
9471                 }
9472                 return ret;
9473         }
9474         return 0;
9475 }
9476
9477 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9478 {
9479         u64 stripe_size;
9480
9481         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9482                 stripe_size = length;
9483                 stripe_size /= num_stripes;
9484         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9485                 stripe_size = length * 2;
9486                 stripe_size /= num_stripes;
9487         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9488                 stripe_size = length;
9489                 stripe_size /= (num_stripes - 1);
9490         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9491                 stripe_size = length;
9492                 stripe_size /= (num_stripes - 2);
9493         } else {
9494                 stripe_size = length;
9495         }
9496         return stripe_size;
9497 }
9498
9499 /*
9500  * Check the chunk with its block group/dev list ref:
9501  * Return 0 if all refs seems valid.
9502  * Return 1 if part of refs seems valid, need later check for rebuild ref
9503  * like missing block group and needs to search extent tree to rebuild them.
9504  * Return -1 if essential refs are missing and unable to rebuild.
9505  */
9506 static int check_chunk_refs(struct chunk_record *chunk_rec,
9507                             struct block_group_tree *block_group_cache,
9508                             struct device_extent_tree *dev_extent_cache,
9509                             int silent)
9510 {
9511         struct cache_extent *block_group_item;
9512         struct block_group_record *block_group_rec;
9513         struct cache_extent *dev_extent_item;
9514         struct device_extent_record *dev_extent_rec;
9515         u64 devid;
9516         u64 offset;
9517         u64 length;
9518         int metadump_v2 = 0;
9519         int i;
9520         int ret = 0;
9521
9522         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9523                                                chunk_rec->offset,
9524                                                chunk_rec->length);
9525         if (block_group_item) {
9526                 block_group_rec = container_of(block_group_item,
9527                                                struct block_group_record,
9528                                                cache);
9529                 if (chunk_rec->length != block_group_rec->offset ||
9530                     chunk_rec->offset != block_group_rec->objectid ||
9531                     (!metadump_v2 &&
9532                      chunk_rec->type_flags != block_group_rec->flags)) {
9533                         if (!silent)
9534                                 fprintf(stderr,
9535                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9536                                         chunk_rec->objectid,
9537                                         chunk_rec->type,
9538                                         chunk_rec->offset,
9539                                         chunk_rec->length,
9540                                         chunk_rec->offset,
9541                                         chunk_rec->type_flags,
9542                                         block_group_rec->objectid,
9543                                         block_group_rec->type,
9544                                         block_group_rec->offset,
9545                                         block_group_rec->offset,
9546                                         block_group_rec->objectid,
9547                                         block_group_rec->flags);
9548                         ret = -1;
9549                 } else {
9550                         list_del_init(&block_group_rec->list);
9551                         chunk_rec->bg_rec = block_group_rec;
9552                 }
9553         } else {
9554                 if (!silent)
9555                         fprintf(stderr,
9556                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9557                                 chunk_rec->objectid,
9558                                 chunk_rec->type,
9559                                 chunk_rec->offset,
9560                                 chunk_rec->length,
9561                                 chunk_rec->offset,
9562                                 chunk_rec->type_flags);
9563                 ret = 1;
9564         }
9565
9566         if (metadump_v2)
9567                 return ret;
9568
9569         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9570                                     chunk_rec->num_stripes);
9571         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9572                 devid = chunk_rec->stripes[i].devid;
9573                 offset = chunk_rec->stripes[i].offset;
9574                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9575                                                        devid, offset, length);
9576                 if (dev_extent_item) {
9577                         dev_extent_rec = container_of(dev_extent_item,
9578                                                 struct device_extent_record,
9579                                                 cache);
9580                         if (dev_extent_rec->objectid != devid ||
9581                             dev_extent_rec->offset != offset ||
9582                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9583                             dev_extent_rec->length != length) {
9584                                 if (!silent)
9585                                         fprintf(stderr,
9586                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9587                                                 chunk_rec->objectid,
9588                                                 chunk_rec->type,
9589                                                 chunk_rec->offset,
9590                                                 chunk_rec->stripes[i].devid,
9591                                                 chunk_rec->stripes[i].offset,
9592                                                 dev_extent_rec->objectid,
9593                                                 dev_extent_rec->offset,
9594                                                 dev_extent_rec->length);
9595                                 ret = -1;
9596                         } else {
9597                                 list_move(&dev_extent_rec->chunk_list,
9598                                           &chunk_rec->dextents);
9599                         }
9600                 } else {
9601                         if (!silent)
9602                                 fprintf(stderr,
9603                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9604                                         chunk_rec->objectid,
9605                                         chunk_rec->type,
9606                                         chunk_rec->offset,
9607                                         chunk_rec->stripes[i].devid,
9608                                         chunk_rec->stripes[i].offset);
9609                         ret = -1;
9610                 }
9611         }
9612         return ret;
9613 }
9614
9615 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9616 int check_chunks(struct cache_tree *chunk_cache,
9617                  struct block_group_tree *block_group_cache,
9618                  struct device_extent_tree *dev_extent_cache,
9619                  struct list_head *good, struct list_head *bad,
9620                  struct list_head *rebuild, int silent)
9621 {
9622         struct cache_extent *chunk_item;
9623         struct chunk_record *chunk_rec;
9624         struct block_group_record *bg_rec;
9625         struct device_extent_record *dext_rec;
9626         int err;
9627         int ret = 0;
9628
9629         chunk_item = first_cache_extent(chunk_cache);
9630         while (chunk_item) {
9631                 chunk_rec = container_of(chunk_item, struct chunk_record,
9632                                          cache);
9633                 err = check_chunk_refs(chunk_rec, block_group_cache,
9634                                        dev_extent_cache, silent);
9635                 if (err < 0)
9636                         ret = err;
9637                 if (err == 0 && good)
9638                         list_add_tail(&chunk_rec->list, good);
9639                 if (err > 0 && rebuild)
9640                         list_add_tail(&chunk_rec->list, rebuild);
9641                 if (err < 0 && bad)
9642                         list_add_tail(&chunk_rec->list, bad);
9643                 chunk_item = next_cache_extent(chunk_item);
9644         }
9645
9646         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9647                 if (!silent)
9648                         fprintf(stderr,
9649                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9650                                 bg_rec->objectid,
9651                                 bg_rec->offset,
9652                                 bg_rec->flags);
9653                 if (!ret)
9654                         ret = 1;
9655         }
9656
9657         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9658                             chunk_list) {
9659                 if (!silent)
9660                         fprintf(stderr,
9661                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9662                                 dext_rec->objectid,
9663                                 dext_rec->offset,
9664                                 dext_rec->length);
9665                 if (!ret)
9666                         ret = 1;
9667         }
9668         return ret;
9669 }
9670
9671
9672 static int check_device_used(struct device_record *dev_rec,
9673                              struct device_extent_tree *dext_cache)
9674 {
9675         struct cache_extent *cache;
9676         struct device_extent_record *dev_extent_rec;
9677         u64 total_byte = 0;
9678
9679         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9680         while (cache) {
9681                 dev_extent_rec = container_of(cache,
9682                                               struct device_extent_record,
9683                                               cache);
9684                 if (dev_extent_rec->objectid != dev_rec->devid)
9685                         break;
9686
9687                 list_del_init(&dev_extent_rec->device_list);
9688                 total_byte += dev_extent_rec->length;
9689                 cache = next_cache_extent(cache);
9690         }
9691
9692         if (total_byte != dev_rec->byte_used) {
9693                 fprintf(stderr,
9694                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9695                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9696                         dev_rec->type, dev_rec->offset);
9697                 return -1;
9698         } else {
9699                 return 0;
9700         }
9701 }
9702
9703 /* check btrfs_dev_item -> btrfs_dev_extent */
9704 static int check_devices(struct rb_root *dev_cache,
9705                          struct device_extent_tree *dev_extent_cache)
9706 {
9707         struct rb_node *dev_node;
9708         struct device_record *dev_rec;
9709         struct device_extent_record *dext_rec;
9710         int err;
9711         int ret = 0;
9712
9713         dev_node = rb_first(dev_cache);
9714         while (dev_node) {
9715                 dev_rec = container_of(dev_node, struct device_record, node);
9716                 err = check_device_used(dev_rec, dev_extent_cache);
9717                 if (err)
9718                         ret = err;
9719
9720                 dev_node = rb_next(dev_node);
9721         }
9722         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9723                             device_list) {
9724                 fprintf(stderr,
9725                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9726                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9727                 if (!ret)
9728                         ret = 1;
9729         }
9730         return ret;
9731 }
9732
9733 static int add_root_item_to_list(struct list_head *head,
9734                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9735                                   u8 level, u8 drop_level,
9736                                   struct btrfs_key *drop_key)
9737 {
9738
9739         struct root_item_record *ri_rec;
9740         ri_rec = malloc(sizeof(*ri_rec));
9741         if (!ri_rec)
9742                 return -ENOMEM;
9743         ri_rec->bytenr = bytenr;
9744         ri_rec->objectid = objectid;
9745         ri_rec->level = level;
9746         ri_rec->drop_level = drop_level;
9747         ri_rec->last_snapshot = last_snapshot;
9748         if (drop_key)
9749                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9750         list_add_tail(&ri_rec->list, head);
9751
9752         return 0;
9753 }
9754
9755 static void free_root_item_list(struct list_head *list)
9756 {
9757         struct root_item_record *ri_rec;
9758
9759         while (!list_empty(list)) {
9760                 ri_rec = list_first_entry(list, struct root_item_record,
9761                                           list);
9762                 list_del_init(&ri_rec->list);
9763                 free(ri_rec);
9764         }
9765 }
9766
9767 static int deal_root_from_list(struct list_head *list,
9768                                struct btrfs_root *root,
9769                                struct block_info *bits,
9770                                int bits_nr,
9771                                struct cache_tree *pending,
9772                                struct cache_tree *seen,
9773                                struct cache_tree *reada,
9774                                struct cache_tree *nodes,
9775                                struct cache_tree *extent_cache,
9776                                struct cache_tree *chunk_cache,
9777                                struct rb_root *dev_cache,
9778                                struct block_group_tree *block_group_cache,
9779                                struct device_extent_tree *dev_extent_cache)
9780 {
9781         int ret = 0;
9782         u64 last;
9783
9784         while (!list_empty(list)) {
9785                 struct root_item_record *rec;
9786                 struct extent_buffer *buf;
9787                 rec = list_entry(list->next,
9788                                  struct root_item_record, list);
9789                 last = 0;
9790                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9791                 if (!extent_buffer_uptodate(buf)) {
9792                         free_extent_buffer(buf);
9793                         ret = -EIO;
9794                         break;
9795                 }
9796                 ret = add_root_to_pending(buf, extent_cache, pending,
9797                                     seen, nodes, rec->objectid);
9798                 if (ret < 0)
9799                         break;
9800                 /*
9801                  * To rebuild extent tree, we need deal with snapshot
9802                  * one by one, otherwise we deal with node firstly which
9803                  * can maximize readahead.
9804                  */
9805                 while (1) {
9806                         ret = run_next_block(root, bits, bits_nr, &last,
9807                                              pending, seen, reada, nodes,
9808                                              extent_cache, chunk_cache,
9809                                              dev_cache, block_group_cache,
9810                                              dev_extent_cache, rec);
9811                         if (ret != 0)
9812                                 break;
9813                 }
9814                 free_extent_buffer(buf);
9815                 list_del(&rec->list);
9816                 free(rec);
9817                 if (ret < 0)
9818                         break;
9819         }
9820         while (ret >= 0) {
9821                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9822                                      reada, nodes, extent_cache, chunk_cache,
9823                                      dev_cache, block_group_cache,
9824                                      dev_extent_cache, NULL);
9825                 if (ret != 0) {
9826                         if (ret > 0)
9827                                 ret = 0;
9828                         break;
9829                 }
9830         }
9831         return ret;
9832 }
9833
9834 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
9835 {
9836         struct rb_root dev_cache;
9837         struct cache_tree chunk_cache;
9838         struct block_group_tree block_group_cache;
9839         struct device_extent_tree dev_extent_cache;
9840         struct cache_tree extent_cache;
9841         struct cache_tree seen;
9842         struct cache_tree pending;
9843         struct cache_tree reada;
9844         struct cache_tree nodes;
9845         struct extent_io_tree excluded_extents;
9846         struct cache_tree corrupt_blocks;
9847         struct btrfs_path path;
9848         struct btrfs_key key;
9849         struct btrfs_key found_key;
9850         int ret, err = 0;
9851         struct block_info *bits;
9852         int bits_nr;
9853         struct extent_buffer *leaf;
9854         int slot;
9855         struct btrfs_root_item ri;
9856         struct list_head dropping_trees;
9857         struct list_head normal_trees;
9858         struct btrfs_root *root1;
9859         struct btrfs_root *root;
9860         u64 objectid;
9861         u8 level;
9862
9863         root = fs_info->fs_root;
9864         dev_cache = RB_ROOT;
9865         cache_tree_init(&chunk_cache);
9866         block_group_tree_init(&block_group_cache);
9867         device_extent_tree_init(&dev_extent_cache);
9868
9869         cache_tree_init(&extent_cache);
9870         cache_tree_init(&seen);
9871         cache_tree_init(&pending);
9872         cache_tree_init(&nodes);
9873         cache_tree_init(&reada);
9874         cache_tree_init(&corrupt_blocks);
9875         extent_io_tree_init(&excluded_extents);
9876         INIT_LIST_HEAD(&dropping_trees);
9877         INIT_LIST_HEAD(&normal_trees);
9878
9879         if (repair) {
9880                 fs_info->excluded_extents = &excluded_extents;
9881                 fs_info->fsck_extent_cache = &extent_cache;
9882                 fs_info->free_extent_hook = free_extent_hook;
9883                 fs_info->corrupt_blocks = &corrupt_blocks;
9884         }
9885
9886         bits_nr = 1024;
9887         bits = malloc(bits_nr * sizeof(struct block_info));
9888         if (!bits) {
9889                 perror("malloc");
9890                 exit(1);
9891         }
9892
9893         if (ctx.progress_enabled) {
9894                 ctx.tp = TASK_EXTENTS;
9895                 task_start(ctx.info);
9896         }
9897
9898 again:
9899         root1 = fs_info->tree_root;
9900         level = btrfs_header_level(root1->node);
9901         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9902                                     root1->node->start, 0, level, 0, NULL);
9903         if (ret < 0)
9904                 goto out;
9905         root1 = fs_info->chunk_root;
9906         level = btrfs_header_level(root1->node);
9907         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9908                                     root1->node->start, 0, level, 0, NULL);
9909         if (ret < 0)
9910                 goto out;
9911         btrfs_init_path(&path);
9912         key.offset = 0;
9913         key.objectid = 0;
9914         key.type = BTRFS_ROOT_ITEM_KEY;
9915         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
9916         if (ret < 0)
9917                 goto out;
9918         while(1) {
9919                 leaf = path.nodes[0];
9920                 slot = path.slots[0];
9921                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9922                         ret = btrfs_next_leaf(root, &path);
9923                         if (ret != 0)
9924                                 break;
9925                         leaf = path.nodes[0];
9926                         slot = path.slots[0];
9927                 }
9928                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9929                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9930                         unsigned long offset;
9931                         u64 last_snapshot;
9932
9933                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9934                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9935                         last_snapshot = btrfs_root_last_snapshot(&ri);
9936                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9937                                 level = btrfs_root_level(&ri);
9938                                 ret = add_root_item_to_list(&normal_trees,
9939                                                 found_key.objectid,
9940                                                 btrfs_root_bytenr(&ri),
9941                                                 last_snapshot, level,
9942                                                 0, NULL);
9943                                 if (ret < 0)
9944                                         goto out;
9945                         } else {
9946                                 level = btrfs_root_level(&ri);
9947                                 objectid = found_key.objectid;
9948                                 btrfs_disk_key_to_cpu(&found_key,
9949                                                       &ri.drop_progress);
9950                                 ret = add_root_item_to_list(&dropping_trees,
9951                                                 objectid,
9952                                                 btrfs_root_bytenr(&ri),
9953                                                 last_snapshot, level,
9954                                                 ri.drop_level, &found_key);
9955                                 if (ret < 0)
9956                                         goto out;
9957                         }
9958                 }
9959                 path.slots[0]++;
9960         }
9961         btrfs_release_path(&path);
9962
9963         /*
9964          * check_block can return -EAGAIN if it fixes something, please keep
9965          * this in mind when dealing with return values from these functions, if
9966          * we get -EAGAIN we want to fall through and restart the loop.
9967          */
9968         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9969                                   &seen, &reada, &nodes, &extent_cache,
9970                                   &chunk_cache, &dev_cache, &block_group_cache,
9971                                   &dev_extent_cache);
9972         if (ret < 0) {
9973                 if (ret == -EAGAIN)
9974                         goto loop;
9975                 goto out;
9976         }
9977         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9978                                   &pending, &seen, &reada, &nodes,
9979                                   &extent_cache, &chunk_cache, &dev_cache,
9980                                   &block_group_cache, &dev_extent_cache);
9981         if (ret < 0) {
9982                 if (ret == -EAGAIN)
9983                         goto loop;
9984                 goto out;
9985         }
9986
9987         ret = check_chunks(&chunk_cache, &block_group_cache,
9988                            &dev_extent_cache, NULL, NULL, NULL, 0);
9989         if (ret) {
9990                 if (ret == -EAGAIN)
9991                         goto loop;
9992                 err = ret;
9993         }
9994
9995         ret = check_extent_refs(root, &extent_cache);
9996         if (ret < 0) {
9997                 if (ret == -EAGAIN)
9998                         goto loop;
9999                 goto out;
10000         }
10001
10002         ret = check_devices(&dev_cache, &dev_extent_cache);
10003         if (ret && err)
10004                 ret = err;
10005
10006 out:
10007         task_stop(ctx.info);
10008         if (repair) {
10009                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10010                 extent_io_tree_cleanup(&excluded_extents);
10011                 fs_info->fsck_extent_cache = NULL;
10012                 fs_info->free_extent_hook = NULL;
10013                 fs_info->corrupt_blocks = NULL;
10014                 fs_info->excluded_extents = NULL;
10015         }
10016         free(bits);
10017         free_chunk_cache_tree(&chunk_cache);
10018         free_device_cache_tree(&dev_cache);
10019         free_block_group_tree(&block_group_cache);
10020         free_device_extent_tree(&dev_extent_cache);
10021         free_extent_cache_tree(&seen);
10022         free_extent_cache_tree(&pending);
10023         free_extent_cache_tree(&reada);
10024         free_extent_cache_tree(&nodes);
10025         free_root_item_list(&normal_trees);
10026         free_root_item_list(&dropping_trees);
10027         return ret;
10028 loop:
10029         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10030         free_extent_cache_tree(&seen);
10031         free_extent_cache_tree(&pending);
10032         free_extent_cache_tree(&reada);
10033         free_extent_cache_tree(&nodes);
10034         free_chunk_cache_tree(&chunk_cache);
10035         free_block_group_tree(&block_group_cache);
10036         free_device_cache_tree(&dev_cache);
10037         free_device_extent_tree(&dev_extent_cache);
10038         free_extent_record_cache(&extent_cache);
10039         free_root_item_list(&normal_trees);
10040         free_root_item_list(&dropping_trees);
10041         extent_io_tree_cleanup(&excluded_extents);
10042         goto again;
10043 }
10044
10045 /*
10046  * Check backrefs of a tree block given by @bytenr or @eb.
10047  *
10048  * @root:       the root containing the @bytenr or @eb
10049  * @eb:         tree block extent buffer, can be NULL
10050  * @bytenr:     bytenr of the tree block to search
10051  * @level:      tree level of the tree block
10052  * @owner:      owner of the tree block
10053  *
10054  * Return >0 for any error found and output error message
10055  * Return 0 for no error found
10056  */
10057 static int check_tree_block_ref(struct btrfs_root *root,
10058                                 struct extent_buffer *eb, u64 bytenr,
10059                                 int level, u64 owner)
10060 {
10061         struct btrfs_key key;
10062         struct btrfs_root *extent_root = root->fs_info->extent_root;
10063         struct btrfs_path path;
10064         struct btrfs_extent_item *ei;
10065         struct btrfs_extent_inline_ref *iref;
10066         struct extent_buffer *leaf;
10067         unsigned long end;
10068         unsigned long ptr;
10069         int slot;
10070         int skinny_level;
10071         int type;
10072         u32 nodesize = root->fs_info->nodesize;
10073         u32 item_size;
10074         u64 offset;
10075         int tree_reloc_root = 0;
10076         int found_ref = 0;
10077         int err = 0;
10078         int ret;
10079
10080         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10081             btrfs_header_bytenr(root->node) == bytenr)
10082                 tree_reloc_root = 1;
10083
10084         btrfs_init_path(&path);
10085         key.objectid = bytenr;
10086         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10087                 key.type = BTRFS_METADATA_ITEM_KEY;
10088         else
10089                 key.type = BTRFS_EXTENT_ITEM_KEY;
10090         key.offset = (u64)-1;
10091
10092         /* Search for the backref in extent tree */
10093         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10094         if (ret < 0) {
10095                 err |= BACKREF_MISSING;
10096                 goto out;
10097         }
10098         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10099         if (ret) {
10100                 err |= BACKREF_MISSING;
10101                 goto out;
10102         }
10103
10104         leaf = path.nodes[0];
10105         slot = path.slots[0];
10106         btrfs_item_key_to_cpu(leaf, &key, slot);
10107
10108         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10109
10110         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10111                 skinny_level = (int)key.offset;
10112                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10113         } else {
10114                 struct btrfs_tree_block_info *info;
10115
10116                 info = (struct btrfs_tree_block_info *)(ei + 1);
10117                 skinny_level = btrfs_tree_block_level(leaf, info);
10118                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10119         }
10120
10121         if (eb) {
10122                 u64 header_gen;
10123                 u64 extent_gen;
10124
10125                 if (!(btrfs_extent_flags(leaf, ei) &
10126                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10127                         error(
10128                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10129                                 key.objectid, nodesize,
10130                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10131                         err = BACKREF_MISMATCH;
10132                 }
10133                 header_gen = btrfs_header_generation(eb);
10134                 extent_gen = btrfs_extent_generation(leaf, ei);
10135                 if (header_gen != extent_gen) {
10136                         error(
10137         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10138                                 key.objectid, nodesize, header_gen,
10139                                 extent_gen);
10140                         err = BACKREF_MISMATCH;
10141                 }
10142                 if (level != skinny_level) {
10143                         error(
10144                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10145                                 key.objectid, nodesize, level, skinny_level);
10146                         err = BACKREF_MISMATCH;
10147                 }
10148                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10149                         error(
10150                         "extent[%llu %u] is referred by other roots than %llu",
10151                                 key.objectid, nodesize, root->objectid);
10152                         err = BACKREF_MISMATCH;
10153                 }
10154         }
10155
10156         /*
10157          * Iterate the extent/metadata item to find the exact backref
10158          */
10159         item_size = btrfs_item_size_nr(leaf, slot);
10160         ptr = (unsigned long)iref;
10161         end = (unsigned long)ei + item_size;
10162         while (ptr < end) {
10163                 iref = (struct btrfs_extent_inline_ref *)ptr;
10164                 type = btrfs_extent_inline_ref_type(leaf, iref);
10165                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10166
10167                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10168                         (offset == root->objectid || offset == owner)) {
10169                         found_ref = 1;
10170                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10171                         /*
10172                          * Backref of tree reloc root points to itself, no need
10173                          * to check backref any more.
10174                          */
10175                         if (tree_reloc_root)
10176                                 found_ref = 1;
10177                         else
10178                         /* Check if the backref points to valid referencer */
10179                                 found_ref = !check_tree_block_ref(root, NULL,
10180                                                 offset, level + 1, owner);
10181                 }
10182
10183                 if (found_ref)
10184                         break;
10185                 ptr += btrfs_extent_inline_ref_size(type);
10186         }
10187
10188         /*
10189          * Inlined extent item doesn't have what we need, check
10190          * TREE_BLOCK_REF_KEY
10191          */
10192         if (!found_ref) {
10193                 btrfs_release_path(&path);
10194                 key.objectid = bytenr;
10195                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10196                 key.offset = root->objectid;
10197
10198                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10199                 if (!ret)
10200                         found_ref = 1;
10201         }
10202         if (!found_ref)
10203                 err |= BACKREF_MISSING;
10204 out:
10205         btrfs_release_path(&path);
10206         if (eb && (err & BACKREF_MISSING))
10207                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10208                         bytenr, nodesize, owner, level);
10209         return err;
10210 }
10211
10212 /*
10213  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10214  *
10215  * Return >0 any error found and output error message
10216  * Return 0 for no error found
10217  */
10218 static int check_extent_data_item(struct btrfs_root *root,
10219                                   struct extent_buffer *eb, int slot)
10220 {
10221         struct btrfs_file_extent_item *fi;
10222         struct btrfs_path path;
10223         struct btrfs_root *extent_root = root->fs_info->extent_root;
10224         struct btrfs_key fi_key;
10225         struct btrfs_key dbref_key;
10226         struct extent_buffer *leaf;
10227         struct btrfs_extent_item *ei;
10228         struct btrfs_extent_inline_ref *iref;
10229         struct btrfs_extent_data_ref *dref;
10230         u64 owner;
10231         u64 disk_bytenr;
10232         u64 disk_num_bytes;
10233         u64 extent_num_bytes;
10234         u64 extent_flags;
10235         u32 item_size;
10236         unsigned long end;
10237         unsigned long ptr;
10238         int type;
10239         u64 ref_root;
10240         int found_dbackref = 0;
10241         int err = 0;
10242         int ret;
10243
10244         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10245         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10246
10247         /* Nothing to check for hole and inline data extents */
10248         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10249             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10250                 return 0;
10251
10252         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10253         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10254         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10255
10256         /* Check unaligned disk_num_bytes and num_bytes */
10257         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10258                 error(
10259 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10260                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10261                         root->fs_info->sectorsize);
10262                 err |= BYTES_UNALIGNED;
10263         } else {
10264                 data_bytes_allocated += disk_num_bytes;
10265         }
10266         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10267                 error(
10268 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10269                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10270                         root->fs_info->sectorsize);
10271                 err |= BYTES_UNALIGNED;
10272         } else {
10273                 data_bytes_referenced += extent_num_bytes;
10274         }
10275         owner = btrfs_header_owner(eb);
10276
10277         /* Check the extent item of the file extent in extent tree */
10278         btrfs_init_path(&path);
10279         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10280         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10281         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10282
10283         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10284         if (ret)
10285                 goto out;
10286
10287         leaf = path.nodes[0];
10288         slot = path.slots[0];
10289         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10290
10291         extent_flags = btrfs_extent_flags(leaf, ei);
10292
10293         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10294                 error(
10295                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10296                     disk_bytenr, disk_num_bytes,
10297                     BTRFS_EXTENT_FLAG_DATA);
10298                 err |= BACKREF_MISMATCH;
10299         }
10300
10301         /* Check data backref inside that extent item */
10302         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10303         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10304         ptr = (unsigned long)iref;
10305         end = (unsigned long)ei + item_size;
10306         while (ptr < end) {
10307                 iref = (struct btrfs_extent_inline_ref *)ptr;
10308                 type = btrfs_extent_inline_ref_type(leaf, iref);
10309                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10310
10311                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10312                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10313                         if (ref_root == owner || ref_root == root->objectid)
10314                                 found_dbackref = 1;
10315                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10316                         found_dbackref = !check_tree_block_ref(root, NULL,
10317                                 btrfs_extent_inline_ref_offset(leaf, iref),
10318                                 0, owner);
10319                 }
10320
10321                 if (found_dbackref)
10322                         break;
10323                 ptr += btrfs_extent_inline_ref_size(type);
10324         }
10325
10326         if (!found_dbackref) {
10327                 btrfs_release_path(&path);
10328
10329                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10330                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10331                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10332                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10333                                 fi_key.objectid, fi_key.offset);
10334
10335                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10336                                         &dbref_key, &path, 0, 0);
10337                 if (!ret) {
10338                         found_dbackref = 1;
10339                         goto out;
10340                 }
10341
10342                 btrfs_release_path(&path);
10343
10344                 /*
10345                  * Neither inlined nor EXTENT_DATA_REF found, try
10346                  * SHARED_DATA_REF as last chance.
10347                  */
10348                 dbref_key.objectid = disk_bytenr;
10349                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10350                 dbref_key.offset = eb->start;
10351
10352                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10353                                         &dbref_key, &path, 0, 0);
10354                 if (!ret) {
10355                         found_dbackref = 1;
10356                         goto out;
10357                 }
10358         }
10359
10360 out:
10361         if (!found_dbackref)
10362                 err |= BACKREF_MISSING;
10363         btrfs_release_path(&path);
10364         if (err & BACKREF_MISSING) {
10365                 error("data extent[%llu %llu] backref lost",
10366                       disk_bytenr, disk_num_bytes);
10367         }
10368         return err;
10369 }
10370
10371 /*
10372  * Get real tree block level for the case like shared block
10373  * Return >= 0 as tree level
10374  * Return <0 for error
10375  */
10376 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10377 {
10378         struct extent_buffer *eb;
10379         struct btrfs_path path;
10380         struct btrfs_key key;
10381         struct btrfs_extent_item *ei;
10382         u64 flags;
10383         u64 transid;
10384         u8 backref_level;
10385         u8 header_level;
10386         int ret;
10387
10388         /* Search extent tree for extent generation and level */
10389         key.objectid = bytenr;
10390         key.type = BTRFS_METADATA_ITEM_KEY;
10391         key.offset = (u64)-1;
10392
10393         btrfs_init_path(&path);
10394         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10395         if (ret < 0)
10396                 goto release_out;
10397         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10398         if (ret < 0)
10399                 goto release_out;
10400         if (ret > 0) {
10401                 ret = -ENOENT;
10402                 goto release_out;
10403         }
10404
10405         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10406         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10407                             struct btrfs_extent_item);
10408         flags = btrfs_extent_flags(path.nodes[0], ei);
10409         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10410                 ret = -ENOENT;
10411                 goto release_out;
10412         }
10413
10414         /* Get transid for later read_tree_block() check */
10415         transid = btrfs_extent_generation(path.nodes[0], ei);
10416
10417         /* Get backref level as one source */
10418         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10419                 backref_level = key.offset;
10420         } else {
10421                 struct btrfs_tree_block_info *info;
10422
10423                 info = (struct btrfs_tree_block_info *)(ei + 1);
10424                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10425         }
10426         btrfs_release_path(&path);
10427
10428         /* Get level from tree block as an alternative source */
10429         eb = read_tree_block(fs_info, bytenr, transid);
10430         if (!extent_buffer_uptodate(eb)) {
10431                 free_extent_buffer(eb);
10432                 return -EIO;
10433         }
10434         header_level = btrfs_header_level(eb);
10435         free_extent_buffer(eb);
10436
10437         if (header_level != backref_level)
10438                 return -EIO;
10439         return header_level;
10440
10441 release_out:
10442         btrfs_release_path(&path);
10443         return ret;
10444 }
10445
10446 /*
10447  * Check if a tree block backref is valid (points to a valid tree block)
10448  * if level == -1, level will be resolved
10449  * Return >0 for any error found and print error message
10450  */
10451 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10452                                     u64 bytenr, int level)
10453 {
10454         struct btrfs_root *root;
10455         struct btrfs_key key;
10456         struct btrfs_path path;
10457         struct extent_buffer *eb;
10458         struct extent_buffer *node;
10459         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10460         int err = 0;
10461         int ret;
10462
10463         /* Query level for level == -1 special case */
10464         if (level == -1)
10465                 level = query_tree_block_level(fs_info, bytenr);
10466         if (level < 0) {
10467                 err |= REFERENCER_MISSING;
10468                 goto out;
10469         }
10470
10471         key.objectid = root_id;
10472         key.type = BTRFS_ROOT_ITEM_KEY;
10473         key.offset = (u64)-1;
10474
10475         root = btrfs_read_fs_root(fs_info, &key);
10476         if (IS_ERR(root)) {
10477                 err |= REFERENCER_MISSING;
10478                 goto out;
10479         }
10480
10481         /* Read out the tree block to get item/node key */
10482         eb = read_tree_block(fs_info, bytenr, 0);
10483         if (!extent_buffer_uptodate(eb)) {
10484                 err |= REFERENCER_MISSING;
10485                 free_extent_buffer(eb);
10486                 goto out;
10487         }
10488
10489         /* Empty tree, no need to check key */
10490         if (!btrfs_header_nritems(eb) && !level) {
10491                 free_extent_buffer(eb);
10492                 goto out;
10493         }
10494
10495         if (level)
10496                 btrfs_node_key_to_cpu(eb, &key, 0);
10497         else
10498                 btrfs_item_key_to_cpu(eb, &key, 0);
10499
10500         free_extent_buffer(eb);
10501
10502         btrfs_init_path(&path);
10503         path.lowest_level = level;
10504         /* Search with the first key, to ensure we can reach it */
10505         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10506         if (ret < 0) {
10507                 err |= REFERENCER_MISSING;
10508                 goto release_out;
10509         }
10510
10511         node = path.nodes[level];
10512         if (btrfs_header_bytenr(node) != bytenr) {
10513                 error(
10514         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10515                         bytenr, nodesize, bytenr,
10516                         btrfs_header_bytenr(node));
10517                 err |= REFERENCER_MISMATCH;
10518         }
10519         if (btrfs_header_level(node) != level) {
10520                 error(
10521         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10522                         bytenr, nodesize, level,
10523                         btrfs_header_level(node));
10524                 err |= REFERENCER_MISMATCH;
10525         }
10526
10527 release_out:
10528         btrfs_release_path(&path);
10529 out:
10530         if (err & REFERENCER_MISSING) {
10531                 if (level < 0)
10532                         error("extent [%llu %d] lost referencer (owner: %llu)",
10533                                 bytenr, nodesize, root_id);
10534                 else
10535                         error(
10536                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10537                                 bytenr, nodesize, root_id, level);
10538         }
10539
10540         return err;
10541 }
10542
10543 /*
10544  * Check if tree block @eb is tree reloc root.
10545  * Return 0 if it's not or any problem happens
10546  * Return 1 if it's a tree reloc root
10547  */
10548 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10549                                  struct extent_buffer *eb)
10550 {
10551         struct btrfs_root *tree_reloc_root;
10552         struct btrfs_key key;
10553         u64 bytenr = btrfs_header_bytenr(eb);
10554         u64 owner = btrfs_header_owner(eb);
10555         int ret = 0;
10556
10557         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10558         key.offset = owner;
10559         key.type = BTRFS_ROOT_ITEM_KEY;
10560
10561         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10562         if (IS_ERR(tree_reloc_root))
10563                 return 0;
10564
10565         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10566                 ret = 1;
10567         btrfs_free_fs_root(tree_reloc_root);
10568         return ret;
10569 }
10570
10571 /*
10572  * Check referencer for shared block backref
10573  * If level == -1, this function will resolve the level.
10574  */
10575 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10576                                      u64 parent, u64 bytenr, int level)
10577 {
10578         struct extent_buffer *eb;
10579         u32 nr;
10580         int found_parent = 0;
10581         int i;
10582
10583         eb = read_tree_block(fs_info, parent, 0);
10584         if (!extent_buffer_uptodate(eb))
10585                 goto out;
10586
10587         if (level == -1)
10588                 level = query_tree_block_level(fs_info, bytenr);
10589         if (level < 0)
10590                 goto out;
10591
10592         /* It's possible it's a tree reloc root */
10593         if (parent == bytenr) {
10594                 if (is_tree_reloc_root(fs_info, eb))
10595                         found_parent = 1;
10596                 goto out;
10597         }
10598
10599         if (level + 1 != btrfs_header_level(eb))
10600                 goto out;
10601
10602         nr = btrfs_header_nritems(eb);
10603         for (i = 0; i < nr; i++) {
10604                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10605                         found_parent = 1;
10606                         break;
10607                 }
10608         }
10609 out:
10610         free_extent_buffer(eb);
10611         if (!found_parent) {
10612                 error(
10613         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10614                         bytenr, fs_info->nodesize, parent, level);
10615                 return REFERENCER_MISSING;
10616         }
10617         return 0;
10618 }
10619
10620 /*
10621  * Check referencer for normal (inlined) data ref
10622  * If len == 0, it will be resolved by searching in extent tree
10623  */
10624 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10625                                      u64 root_id, u64 objectid, u64 offset,
10626                                      u64 bytenr, u64 len, u32 count)
10627 {
10628         struct btrfs_root *root;
10629         struct btrfs_root *extent_root = fs_info->extent_root;
10630         struct btrfs_key key;
10631         struct btrfs_path path;
10632         struct extent_buffer *leaf;
10633         struct btrfs_file_extent_item *fi;
10634         u32 found_count = 0;
10635         int slot;
10636         int ret = 0;
10637
10638         if (!len) {
10639                 key.objectid = bytenr;
10640                 key.type = BTRFS_EXTENT_ITEM_KEY;
10641                 key.offset = (u64)-1;
10642
10643                 btrfs_init_path(&path);
10644                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10645                 if (ret < 0)
10646                         goto out;
10647                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10648                 if (ret)
10649                         goto out;
10650                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10651                 if (key.objectid != bytenr ||
10652                     key.type != BTRFS_EXTENT_ITEM_KEY)
10653                         goto out;
10654                 len = key.offset;
10655                 btrfs_release_path(&path);
10656         }
10657         key.objectid = root_id;
10658         key.type = BTRFS_ROOT_ITEM_KEY;
10659         key.offset = (u64)-1;
10660         btrfs_init_path(&path);
10661
10662         root = btrfs_read_fs_root(fs_info, &key);
10663         if (IS_ERR(root))
10664                 goto out;
10665
10666         key.objectid = objectid;
10667         key.type = BTRFS_EXTENT_DATA_KEY;
10668         /*
10669          * It can be nasty as data backref offset is
10670          * file offset - file extent offset, which is smaller or
10671          * equal to original backref offset.  The only special case is
10672          * overflow.  So we need to special check and do further search.
10673          */
10674         key.offset = offset & (1ULL << 63) ? 0 : offset;
10675
10676         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10677         if (ret < 0)
10678                 goto out;
10679
10680         /*
10681          * Search afterwards to get correct one
10682          * NOTE: As we must do a comprehensive check on the data backref to
10683          * make sure the dref count also matches, we must iterate all file
10684          * extents for that inode.
10685          */
10686         while (1) {
10687                 leaf = path.nodes[0];
10688                 slot = path.slots[0];
10689
10690                 if (slot >= btrfs_header_nritems(leaf))
10691                         goto next;
10692                 btrfs_item_key_to_cpu(leaf, &key, slot);
10693                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10694                         break;
10695                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10696                 /*
10697                  * Except normal disk bytenr and disk num bytes, we still
10698                  * need to do extra check on dbackref offset as
10699                  * dbackref offset = file_offset - file_extent_offset
10700                  */
10701                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10702                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10703                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10704                     offset)
10705                         found_count++;
10706
10707 next:
10708                 ret = btrfs_next_item(root, &path);
10709                 if (ret)
10710                         break;
10711         }
10712 out:
10713         btrfs_release_path(&path);
10714         if (found_count != count) {
10715                 error(
10716 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10717                         bytenr, len, root_id, objectid, offset, count, found_count);
10718                 return REFERENCER_MISSING;
10719         }
10720         return 0;
10721 }
10722
10723 /*
10724  * Check if the referencer of a shared data backref exists
10725  */
10726 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10727                                      u64 parent, u64 bytenr)
10728 {
10729         struct extent_buffer *eb;
10730         struct btrfs_key key;
10731         struct btrfs_file_extent_item *fi;
10732         u32 nr;
10733         int found_parent = 0;
10734         int i;
10735
10736         eb = read_tree_block(fs_info, parent, 0);
10737         if (!extent_buffer_uptodate(eb))
10738                 goto out;
10739
10740         nr = btrfs_header_nritems(eb);
10741         for (i = 0; i < nr; i++) {
10742                 btrfs_item_key_to_cpu(eb, &key, i);
10743                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10744                         continue;
10745
10746                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10747                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10748                         continue;
10749
10750                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10751                         found_parent = 1;
10752                         break;
10753                 }
10754         }
10755
10756 out:
10757         free_extent_buffer(eb);
10758         if (!found_parent) {
10759                 error("shared extent %llu referencer lost (parent: %llu)",
10760                         bytenr, parent);
10761                 return REFERENCER_MISSING;
10762         }
10763         return 0;
10764 }
10765
10766 /*
10767  * This function will check a given extent item, including its backref and
10768  * itself (like crossing stripe boundary and type)
10769  *
10770  * Since we don't use extent_record anymore, introduce new error bit
10771  */
10772 static int check_extent_item(struct btrfs_fs_info *fs_info,
10773                              struct extent_buffer *eb, int slot)
10774 {
10775         struct btrfs_extent_item *ei;
10776         struct btrfs_extent_inline_ref *iref;
10777         struct btrfs_extent_data_ref *dref;
10778         unsigned long end;
10779         unsigned long ptr;
10780         int type;
10781         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10782         u32 item_size = btrfs_item_size_nr(eb, slot);
10783         u64 flags;
10784         u64 offset;
10785         int metadata = 0;
10786         int level;
10787         struct btrfs_key key;
10788         int ret;
10789         int err = 0;
10790
10791         btrfs_item_key_to_cpu(eb, &key, slot);
10792         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10793                 bytes_used += key.offset;
10794         else
10795                 bytes_used += nodesize;
10796
10797         if (item_size < sizeof(*ei)) {
10798                 /*
10799                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10800                  * old thing when on disk format is still un-determined.
10801                  * No need to care about it anymore
10802                  */
10803                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10804                 return -ENOTTY;
10805         }
10806
10807         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10808         flags = btrfs_extent_flags(eb, ei);
10809
10810         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10811                 metadata = 1;
10812         if (metadata && check_crossing_stripes(global_info, key.objectid,
10813                                                eb->len)) {
10814                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10815                       key.objectid, key.objectid + nodesize);
10816                 err |= CROSSING_STRIPE_BOUNDARY;
10817         }
10818
10819         ptr = (unsigned long)(ei + 1);
10820
10821         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10822                 /* Old EXTENT_ITEM metadata */
10823                 struct btrfs_tree_block_info *info;
10824
10825                 info = (struct btrfs_tree_block_info *)ptr;
10826                 level = btrfs_tree_block_level(eb, info);
10827                 ptr += sizeof(struct btrfs_tree_block_info);
10828         } else {
10829                 /* New METADATA_ITEM */
10830                 level = key.offset;
10831         }
10832         end = (unsigned long)ei + item_size;
10833
10834 next:
10835         /* Reached extent item end normally */
10836         if (ptr == end)
10837                 goto out;
10838
10839         /* Beyond extent item end, wrong item size */
10840         if (ptr > end) {
10841                 err |= ITEM_SIZE_MISMATCH;
10842                 error("extent item at bytenr %llu slot %d has wrong size",
10843                         eb->start, slot);
10844                 goto out;
10845         }
10846
10847         /* Now check every backref in this extent item */
10848         iref = (struct btrfs_extent_inline_ref *)ptr;
10849         type = btrfs_extent_inline_ref_type(eb, iref);
10850         offset = btrfs_extent_inline_ref_offset(eb, iref);
10851         switch (type) {
10852         case BTRFS_TREE_BLOCK_REF_KEY:
10853                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10854                                                level);
10855                 err |= ret;
10856                 break;
10857         case BTRFS_SHARED_BLOCK_REF_KEY:
10858                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10859                                                  level);
10860                 err |= ret;
10861                 break;
10862         case BTRFS_EXTENT_DATA_REF_KEY:
10863                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10864                 ret = check_extent_data_backref(fs_info,
10865                                 btrfs_extent_data_ref_root(eb, dref),
10866                                 btrfs_extent_data_ref_objectid(eb, dref),
10867                                 btrfs_extent_data_ref_offset(eb, dref),
10868                                 key.objectid, key.offset,
10869                                 btrfs_extent_data_ref_count(eb, dref));
10870                 err |= ret;
10871                 break;
10872         case BTRFS_SHARED_DATA_REF_KEY:
10873                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10874                 err |= ret;
10875                 break;
10876         default:
10877                 error("extent[%llu %d %llu] has unknown ref type: %d",
10878                         key.objectid, key.type, key.offset, type);
10879                 err |= UNKNOWN_TYPE;
10880                 goto out;
10881         }
10882
10883         ptr += btrfs_extent_inline_ref_size(type);
10884         goto next;
10885
10886 out:
10887         return err;
10888 }
10889
10890 /*
10891  * Check if a dev extent item is referred correctly by its chunk
10892  */
10893 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10894                                  struct extent_buffer *eb, int slot)
10895 {
10896         struct btrfs_root *chunk_root = fs_info->chunk_root;
10897         struct btrfs_dev_extent *ptr;
10898         struct btrfs_path path;
10899         struct btrfs_key chunk_key;
10900         struct btrfs_key devext_key;
10901         struct btrfs_chunk *chunk;
10902         struct extent_buffer *l;
10903         int num_stripes;
10904         u64 length;
10905         int i;
10906         int found_chunk = 0;
10907         int ret;
10908
10909         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10910         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10911         length = btrfs_dev_extent_length(eb, ptr);
10912
10913         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10914         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10915         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10916
10917         btrfs_init_path(&path);
10918         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10919         if (ret)
10920                 goto out;
10921
10922         l = path.nodes[0];
10923         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10924         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10925                                       chunk_key.offset);
10926         if (ret < 0)
10927                 goto out;
10928
10929         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10930                 goto out;
10931
10932         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10933         for (i = 0; i < num_stripes; i++) {
10934                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10935                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10936
10937                 if (devid == devext_key.objectid &&
10938                     offset == devext_key.offset) {
10939                         found_chunk = 1;
10940                         break;
10941                 }
10942         }
10943 out:
10944         btrfs_release_path(&path);
10945         if (!found_chunk) {
10946                 error(
10947                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10948                         devext_key.objectid, devext_key.offset, length);
10949                 return REFERENCER_MISSING;
10950         }
10951         return 0;
10952 }
10953
10954 /*
10955  * Check if the used space is correct with the dev item
10956  */
10957 static int check_dev_item(struct btrfs_fs_info *fs_info,
10958                           struct extent_buffer *eb, int slot)
10959 {
10960         struct btrfs_root *dev_root = fs_info->dev_root;
10961         struct btrfs_dev_item *dev_item;
10962         struct btrfs_path path;
10963         struct btrfs_key key;
10964         struct btrfs_dev_extent *ptr;
10965         u64 dev_id;
10966         u64 used;
10967         u64 total = 0;
10968         int ret;
10969
10970         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10971         dev_id = btrfs_device_id(eb, dev_item);
10972         used = btrfs_device_bytes_used(eb, dev_item);
10973
10974         key.objectid = dev_id;
10975         key.type = BTRFS_DEV_EXTENT_KEY;
10976         key.offset = 0;
10977
10978         btrfs_init_path(&path);
10979         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10980         if (ret < 0) {
10981                 btrfs_item_key_to_cpu(eb, &key, slot);
10982                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10983                         key.objectid, key.type, key.offset);
10984                 btrfs_release_path(&path);
10985                 return REFERENCER_MISSING;
10986         }
10987
10988         /* Iterate dev_extents to calculate the used space of a device */
10989         while (1) {
10990                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10991                         goto next;
10992
10993                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10994                 if (key.objectid > dev_id)
10995                         break;
10996                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10997                         goto next;
10998
10999                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11000                                      struct btrfs_dev_extent);
11001                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11002 next:
11003                 ret = btrfs_next_item(dev_root, &path);
11004                 if (ret)
11005                         break;
11006         }
11007         btrfs_release_path(&path);
11008
11009         if (used != total) {
11010                 btrfs_item_key_to_cpu(eb, &key, slot);
11011                 error(
11012 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11013                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11014                         BTRFS_DEV_EXTENT_KEY, dev_id);
11015                 return ACCOUNTING_MISMATCH;
11016         }
11017         return 0;
11018 }
11019
11020 /*
11021  * Check a block group item with its referener (chunk) and its used space
11022  * with extent/metadata item
11023  */
11024 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11025                                   struct extent_buffer *eb, int slot)
11026 {
11027         struct btrfs_root *extent_root = fs_info->extent_root;
11028         struct btrfs_root *chunk_root = fs_info->chunk_root;
11029         struct btrfs_block_group_item *bi;
11030         struct btrfs_block_group_item bg_item;
11031         struct btrfs_path path;
11032         struct btrfs_key bg_key;
11033         struct btrfs_key chunk_key;
11034         struct btrfs_key extent_key;
11035         struct btrfs_chunk *chunk;
11036         struct extent_buffer *leaf;
11037         struct btrfs_extent_item *ei;
11038         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11039         u64 flags;
11040         u64 bg_flags;
11041         u64 used;
11042         u64 total = 0;
11043         int ret;
11044         int err = 0;
11045
11046         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11047         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11048         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11049         used = btrfs_block_group_used(&bg_item);
11050         bg_flags = btrfs_block_group_flags(&bg_item);
11051
11052         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11053         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11054         chunk_key.offset = bg_key.objectid;
11055
11056         btrfs_init_path(&path);
11057         /* Search for the referencer chunk */
11058         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11059         if (ret) {
11060                 error(
11061                 "block group[%llu %llu] did not find the related chunk item",
11062                         bg_key.objectid, bg_key.offset);
11063                 err |= REFERENCER_MISSING;
11064         } else {
11065                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11066                                         struct btrfs_chunk);
11067                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11068                                                 bg_key.offset) {
11069                         error(
11070         "block group[%llu %llu] related chunk item length does not match",
11071                                 bg_key.objectid, bg_key.offset);
11072                         err |= REFERENCER_MISMATCH;
11073                 }
11074         }
11075         btrfs_release_path(&path);
11076
11077         /* Search from the block group bytenr */
11078         extent_key.objectid = bg_key.objectid;
11079         extent_key.type = 0;
11080         extent_key.offset = 0;
11081
11082         btrfs_init_path(&path);
11083         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11084         if (ret < 0)
11085                 goto out;
11086
11087         /* Iterate extent tree to account used space */
11088         while (1) {
11089                 leaf = path.nodes[0];
11090
11091                 /* Search slot can point to the last item beyond leaf nritems */
11092                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11093                         goto next;
11094
11095                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11096                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11097                         break;
11098
11099                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11100                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11101                         goto next;
11102                 if (extent_key.objectid < bg_key.objectid)
11103                         goto next;
11104
11105                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11106                         total += nodesize;
11107                 else
11108                         total += extent_key.offset;
11109
11110                 ei = btrfs_item_ptr(leaf, path.slots[0],
11111                                     struct btrfs_extent_item);
11112                 flags = btrfs_extent_flags(leaf, ei);
11113                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11114                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11115                                 error(
11116                         "bad extent[%llu, %llu) type mismatch with chunk",
11117                                         extent_key.objectid,
11118                                         extent_key.objectid + extent_key.offset);
11119                                 err |= CHUNK_TYPE_MISMATCH;
11120                         }
11121                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11122                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11123                                     BTRFS_BLOCK_GROUP_METADATA))) {
11124                                 error(
11125                         "bad extent[%llu, %llu) type mismatch with chunk",
11126                                         extent_key.objectid,
11127                                         extent_key.objectid + nodesize);
11128                                 err |= CHUNK_TYPE_MISMATCH;
11129                         }
11130                 }
11131 next:
11132                 ret = btrfs_next_item(extent_root, &path);
11133                 if (ret)
11134                         break;
11135         }
11136
11137 out:
11138         btrfs_release_path(&path);
11139
11140         if (total != used) {
11141                 error(
11142                 "block group[%llu %llu] used %llu but extent items used %llu",
11143                         bg_key.objectid, bg_key.offset, used, total);
11144                 err |= ACCOUNTING_MISMATCH;
11145         }
11146         return err;
11147 }
11148
11149 /*
11150  * Check a chunk item.
11151  * Including checking all referred dev_extents and block group
11152  */
11153 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11154                             struct extent_buffer *eb, int slot)
11155 {
11156         struct btrfs_root *extent_root = fs_info->extent_root;
11157         struct btrfs_root *dev_root = fs_info->dev_root;
11158         struct btrfs_path path;
11159         struct btrfs_key chunk_key;
11160         struct btrfs_key bg_key;
11161         struct btrfs_key devext_key;
11162         struct btrfs_chunk *chunk;
11163         struct extent_buffer *leaf;
11164         struct btrfs_block_group_item *bi;
11165         struct btrfs_block_group_item bg_item;
11166         struct btrfs_dev_extent *ptr;
11167         u64 length;
11168         u64 chunk_end;
11169         u64 stripe_len;
11170         u64 type;
11171         int num_stripes;
11172         u64 offset;
11173         u64 objectid;
11174         int i;
11175         int ret;
11176         int err = 0;
11177
11178         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11179         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11180         length = btrfs_chunk_length(eb, chunk);
11181         chunk_end = chunk_key.offset + length;
11182         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11183                                       chunk_key.offset);
11184         if (ret < 0) {
11185                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11186                         chunk_end);
11187                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11188                 goto out;
11189         }
11190         type = btrfs_chunk_type(eb, chunk);
11191
11192         bg_key.objectid = chunk_key.offset;
11193         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11194         bg_key.offset = length;
11195
11196         btrfs_init_path(&path);
11197         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11198         if (ret) {
11199                 error(
11200                 "chunk[%llu %llu) did not find the related block group item",
11201                         chunk_key.offset, chunk_end);
11202                 err |= REFERENCER_MISSING;
11203         } else{
11204                 leaf = path.nodes[0];
11205                 bi = btrfs_item_ptr(leaf, path.slots[0],
11206                                     struct btrfs_block_group_item);
11207                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11208                                    sizeof(bg_item));
11209                 if (btrfs_block_group_flags(&bg_item) != type) {
11210                         error(
11211 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11212                                 chunk_key.offset, chunk_end, type,
11213                                 btrfs_block_group_flags(&bg_item));
11214                         err |= REFERENCER_MISSING;
11215                 }
11216         }
11217
11218         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11219         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11220         for (i = 0; i < num_stripes; i++) {
11221                 btrfs_release_path(&path);
11222                 btrfs_init_path(&path);
11223                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11224                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11225                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11226
11227                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11228                                         0, 0);
11229                 if (ret)
11230                         goto not_match_dev;
11231
11232                 leaf = path.nodes[0];
11233                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11234                                      struct btrfs_dev_extent);
11235                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11236                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11237                 if (objectid != chunk_key.objectid ||
11238                     offset != chunk_key.offset ||
11239                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11240                         goto not_match_dev;
11241                 continue;
11242 not_match_dev:
11243                 err |= BACKREF_MISSING;
11244                 error(
11245                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11246                         chunk_key.objectid, chunk_end, i);
11247                 continue;
11248         }
11249         btrfs_release_path(&path);
11250 out:
11251         return err;
11252 }
11253
11254 /*
11255  * Main entry function to check known items and update related accounting info
11256  */
11257 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11258 {
11259         struct btrfs_fs_info *fs_info = root->fs_info;
11260         struct btrfs_key key;
11261         int slot = 0;
11262         int type;
11263         struct btrfs_extent_data_ref *dref;
11264         int ret;
11265         int err = 0;
11266
11267 next:
11268         btrfs_item_key_to_cpu(eb, &key, slot);
11269         type = key.type;
11270
11271         switch (type) {
11272         case BTRFS_EXTENT_DATA_KEY:
11273                 ret = check_extent_data_item(root, eb, slot);
11274                 err |= ret;
11275                 break;
11276         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11277                 ret = check_block_group_item(fs_info, eb, slot);
11278                 err |= ret;
11279                 break;
11280         case BTRFS_DEV_ITEM_KEY:
11281                 ret = check_dev_item(fs_info, eb, slot);
11282                 err |= ret;
11283                 break;
11284         case BTRFS_CHUNK_ITEM_KEY:
11285                 ret = check_chunk_item(fs_info, eb, slot);
11286                 err |= ret;
11287                 break;
11288         case BTRFS_DEV_EXTENT_KEY:
11289                 ret = check_dev_extent_item(fs_info, eb, slot);
11290                 err |= ret;
11291                 break;
11292         case BTRFS_EXTENT_ITEM_KEY:
11293         case BTRFS_METADATA_ITEM_KEY:
11294                 ret = check_extent_item(fs_info, eb, slot);
11295                 err |= ret;
11296                 break;
11297         case BTRFS_EXTENT_CSUM_KEY:
11298                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11299                 break;
11300         case BTRFS_TREE_BLOCK_REF_KEY:
11301                 ret = check_tree_block_backref(fs_info, key.offset,
11302                                                key.objectid, -1);
11303                 err |= ret;
11304                 break;
11305         case BTRFS_EXTENT_DATA_REF_KEY:
11306                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11307                 ret = check_extent_data_backref(fs_info,
11308                                 btrfs_extent_data_ref_root(eb, dref),
11309                                 btrfs_extent_data_ref_objectid(eb, dref),
11310                                 btrfs_extent_data_ref_offset(eb, dref),
11311                                 key.objectid, 0,
11312                                 btrfs_extent_data_ref_count(eb, dref));
11313                 err |= ret;
11314                 break;
11315         case BTRFS_SHARED_BLOCK_REF_KEY:
11316                 ret = check_shared_block_backref(fs_info, key.offset,
11317                                                  key.objectid, -1);
11318                 err |= ret;
11319                 break;
11320         case BTRFS_SHARED_DATA_REF_KEY:
11321                 ret = check_shared_data_backref(fs_info, key.offset,
11322                                                 key.objectid);
11323                 err |= ret;
11324                 break;
11325         default:
11326                 break;
11327         }
11328
11329         if (++slot < btrfs_header_nritems(eb))
11330                 goto next;
11331
11332         return err;
11333 }
11334
11335 /*
11336  * Helper function for later fs/subvol tree check.  To determine if a tree
11337  * block should be checked.
11338  * This function will ensure only the direct referencer with lowest rootid to
11339  * check a fs/subvolume tree block.
11340  *
11341  * Backref check at extent tree would detect errors like missing subvolume
11342  * tree, so we can do aggressive check to reduce duplicated checks.
11343  */
11344 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11345 {
11346         struct btrfs_root *extent_root = root->fs_info->extent_root;
11347         struct btrfs_key key;
11348         struct btrfs_path path;
11349         struct extent_buffer *leaf;
11350         int slot;
11351         struct btrfs_extent_item *ei;
11352         unsigned long ptr;
11353         unsigned long end;
11354         int type;
11355         u32 item_size;
11356         u64 offset;
11357         struct btrfs_extent_inline_ref *iref;
11358         int ret;
11359
11360         btrfs_init_path(&path);
11361         key.objectid = btrfs_header_bytenr(eb);
11362         key.type = BTRFS_METADATA_ITEM_KEY;
11363         key.offset = (u64)-1;
11364
11365         /*
11366          * Any failure in backref resolving means we can't determine
11367          * whom the tree block belongs to.
11368          * So in that case, we need to check that tree block
11369          */
11370         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11371         if (ret < 0)
11372                 goto need_check;
11373
11374         ret = btrfs_previous_extent_item(extent_root, &path,
11375                                          btrfs_header_bytenr(eb));
11376         if (ret)
11377                 goto need_check;
11378
11379         leaf = path.nodes[0];
11380         slot = path.slots[0];
11381         btrfs_item_key_to_cpu(leaf, &key, slot);
11382         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11383
11384         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11385                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11386         } else {
11387                 struct btrfs_tree_block_info *info;
11388
11389                 info = (struct btrfs_tree_block_info *)(ei + 1);
11390                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11391         }
11392
11393         item_size = btrfs_item_size_nr(leaf, slot);
11394         ptr = (unsigned long)iref;
11395         end = (unsigned long)ei + item_size;
11396         while (ptr < end) {
11397                 iref = (struct btrfs_extent_inline_ref *)ptr;
11398                 type = btrfs_extent_inline_ref_type(leaf, iref);
11399                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11400
11401                 /*
11402                  * We only check the tree block if current root is
11403                  * the lowest referencer of it.
11404                  */
11405                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11406                     offset < root->objectid) {
11407                         btrfs_release_path(&path);
11408                         return 0;
11409                 }
11410
11411                 ptr += btrfs_extent_inline_ref_size(type);
11412         }
11413         /*
11414          * Normally we should also check keyed tree block ref, but that may be
11415          * very time consuming.  Inlined ref should already make us skip a lot
11416          * of refs now.  So skip search keyed tree block ref.
11417          */
11418
11419 need_check:
11420         btrfs_release_path(&path);
11421         return 1;
11422 }
11423
11424 /*
11425  * Traversal function for tree block. We will do:
11426  * 1) Skip shared fs/subvolume tree blocks
11427  * 2) Update related bytes accounting
11428  * 3) Pre-order traversal
11429  */
11430 static int traverse_tree_block(struct btrfs_root *root,
11431                                 struct extent_buffer *node)
11432 {
11433         struct extent_buffer *eb;
11434         struct btrfs_key key;
11435         struct btrfs_key drop_key;
11436         int level;
11437         u64 nr;
11438         int i;
11439         int err = 0;
11440         int ret;
11441
11442         /*
11443          * Skip shared fs/subvolume tree block, in that case they will
11444          * be checked by referencer with lowest rootid
11445          */
11446         if (is_fstree(root->objectid) && !should_check(root, node))
11447                 return 0;
11448
11449         /* Update bytes accounting */
11450         total_btree_bytes += node->len;
11451         if (fs_root_objectid(btrfs_header_owner(node)))
11452                 total_fs_tree_bytes += node->len;
11453         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11454                 total_extent_tree_bytes += node->len;
11455
11456         /* pre-order tranversal, check itself first */
11457         level = btrfs_header_level(node);
11458         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11459                                    btrfs_header_level(node),
11460                                    btrfs_header_owner(node));
11461         err |= ret;
11462         if (err)
11463                 error(
11464         "check %s failed root %llu bytenr %llu level %d, force continue check",
11465                         level ? "node":"leaf", root->objectid,
11466                         btrfs_header_bytenr(node), btrfs_header_level(node));
11467
11468         if (!level) {
11469                 btree_space_waste += btrfs_leaf_free_space(root, node);
11470                 ret = check_leaf_items(root, node);
11471                 err |= ret;
11472                 return err;
11473         }
11474
11475         nr = btrfs_header_nritems(node);
11476         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11477         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11478                 sizeof(struct btrfs_key_ptr);
11479
11480         /* Then check all its children */
11481         for (i = 0; i < nr; i++) {
11482                 u64 blocknr = btrfs_node_blockptr(node, i);
11483
11484                 btrfs_node_key_to_cpu(node, &key, i);
11485                 if (level == root->root_item.drop_level &&
11486                     is_dropped_key(&key, &drop_key))
11487                         continue;
11488
11489                 /*
11490                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11491                  * to call the function itself.
11492                  */
11493                 eb = read_tree_block(root->fs_info, blocknr, 0);
11494                 if (extent_buffer_uptodate(eb)) {
11495                         ret = traverse_tree_block(root, eb);
11496                         err |= ret;
11497                 }
11498                 free_extent_buffer(eb);
11499         }
11500
11501         return err;
11502 }
11503
11504 /*
11505  * Low memory usage version check_chunks_and_extents.
11506  */
11507 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11508 {
11509         struct btrfs_path path;
11510         struct btrfs_key key;
11511         struct btrfs_root *root1;
11512         struct btrfs_root *root;
11513         struct btrfs_root *cur_root;
11514         int err = 0;
11515         int ret;
11516
11517         root = fs_info->fs_root;
11518
11519         root1 = root->fs_info->chunk_root;
11520         ret = traverse_tree_block(root1, root1->node);
11521         err |= ret;
11522
11523         root1 = root->fs_info->tree_root;
11524         ret = traverse_tree_block(root1, root1->node);
11525         err |= ret;
11526
11527         btrfs_init_path(&path);
11528         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11529         key.offset = 0;
11530         key.type = BTRFS_ROOT_ITEM_KEY;
11531
11532         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11533         if (ret) {
11534                 error("cannot find extent treet in tree_root");
11535                 goto out;
11536         }
11537
11538         while (1) {
11539                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11540                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11541                         goto next;
11542                 key.offset = (u64)-1;
11543
11544                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11545                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11546                                         &key);
11547                 else
11548                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11549                 if (IS_ERR(cur_root) || !cur_root) {
11550                         error("failed to read tree: %lld", key.objectid);
11551                         goto next;
11552                 }
11553
11554                 ret = traverse_tree_block(cur_root, cur_root->node);
11555                 err |= ret;
11556
11557                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11558                         btrfs_free_fs_root(cur_root);
11559 next:
11560                 ret = btrfs_next_item(root1, &path);
11561                 if (ret)
11562                         goto out;
11563         }
11564
11565 out:
11566         btrfs_release_path(&path);
11567         return err;
11568 }
11569
11570 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11571 {
11572         int ret;
11573
11574         if (!ctx.progress_enabled)
11575                 fprintf(stderr, "checking extents\n");
11576         if (check_mode == CHECK_MODE_LOWMEM)
11577                 ret = check_chunks_and_extents_v2(fs_info);
11578         else
11579                 ret = check_chunks_and_extents(fs_info);
11580
11581         return ret;
11582 }
11583
11584 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11585                            struct btrfs_root *root, int overwrite)
11586 {
11587         struct extent_buffer *c;
11588         struct extent_buffer *old = root->node;
11589         int level;
11590         int ret;
11591         struct btrfs_disk_key disk_key = {0,0,0};
11592
11593         level = 0;
11594
11595         if (overwrite) {
11596                 c = old;
11597                 extent_buffer_get(c);
11598                 goto init;
11599         }
11600         c = btrfs_alloc_free_block(trans, root,
11601                                    root->fs_info->nodesize,
11602                                    root->root_key.objectid,
11603                                    &disk_key, level, 0, 0);
11604         if (IS_ERR(c)) {
11605                 c = old;
11606                 extent_buffer_get(c);
11607                 overwrite = 1;
11608         }
11609 init:
11610         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11611         btrfs_set_header_level(c, level);
11612         btrfs_set_header_bytenr(c, c->start);
11613         btrfs_set_header_generation(c, trans->transid);
11614         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11615         btrfs_set_header_owner(c, root->root_key.objectid);
11616
11617         write_extent_buffer(c, root->fs_info->fsid,
11618                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11619
11620         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11621                             btrfs_header_chunk_tree_uuid(c),
11622                             BTRFS_UUID_SIZE);
11623
11624         btrfs_mark_buffer_dirty(c);
11625         /*
11626          * this case can happen in the following case:
11627          *
11628          * 1.overwrite previous root.
11629          *
11630          * 2.reinit reloc data root, this is because we skip pin
11631          * down reloc data tree before which means we can allocate
11632          * same block bytenr here.
11633          */
11634         if (old->start == c->start) {
11635                 btrfs_set_root_generation(&root->root_item,
11636                                           trans->transid);
11637                 root->root_item.level = btrfs_header_level(root->node);
11638                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11639                                         &root->root_key, &root->root_item);
11640                 if (ret) {
11641                         free_extent_buffer(c);
11642                         return ret;
11643                 }
11644         }
11645         free_extent_buffer(old);
11646         root->node = c;
11647         add_root_to_dirty_list(root);
11648         return 0;
11649 }
11650
11651 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11652                                 struct extent_buffer *eb, int tree_root)
11653 {
11654         struct extent_buffer *tmp;
11655         struct btrfs_root_item *ri;
11656         struct btrfs_key key;
11657         u64 bytenr;
11658         int level = btrfs_header_level(eb);
11659         int nritems;
11660         int ret;
11661         int i;
11662
11663         /*
11664          * If we have pinned this block before, don't pin it again.
11665          * This can not only avoid forever loop with broken filesystem
11666          * but also give us some speedups.
11667          */
11668         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11669                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11670                 return 0;
11671
11672         btrfs_pin_extent(fs_info, eb->start, eb->len);
11673
11674         nritems = btrfs_header_nritems(eb);
11675         for (i = 0; i < nritems; i++) {
11676                 if (level == 0) {
11677                         btrfs_item_key_to_cpu(eb, &key, i);
11678                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11679                                 continue;
11680                         /* Skip the extent root and reloc roots */
11681                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11682                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11683                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11684                                 continue;
11685                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11686                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11687
11688                         /*
11689                          * If at any point we start needing the real root we
11690                          * will have to build a stump root for the root we are
11691                          * in, but for now this doesn't actually use the root so
11692                          * just pass in extent_root.
11693                          */
11694                         tmp = read_tree_block(fs_info, bytenr, 0);
11695                         if (!extent_buffer_uptodate(tmp)) {
11696                                 fprintf(stderr, "Error reading root block\n");
11697                                 return -EIO;
11698                         }
11699                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11700                         free_extent_buffer(tmp);
11701                         if (ret)
11702                                 return ret;
11703                 } else {
11704                         bytenr = btrfs_node_blockptr(eb, i);
11705
11706                         /* If we aren't the tree root don't read the block */
11707                         if (level == 1 && !tree_root) {
11708                                 btrfs_pin_extent(fs_info, bytenr,
11709                                                 fs_info->nodesize);
11710                                 continue;
11711                         }
11712
11713                         tmp = read_tree_block(fs_info, bytenr, 0);
11714                         if (!extent_buffer_uptodate(tmp)) {
11715                                 fprintf(stderr, "Error reading tree block\n");
11716                                 return -EIO;
11717                         }
11718                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11719                         free_extent_buffer(tmp);
11720                         if (ret)
11721                                 return ret;
11722                 }
11723         }
11724
11725         return 0;
11726 }
11727
11728 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11729 {
11730         int ret;
11731
11732         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11733         if (ret)
11734                 return ret;
11735
11736         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11737 }
11738
11739 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11740 {
11741         struct btrfs_block_group_cache *cache;
11742         struct btrfs_path path;
11743         struct extent_buffer *leaf;
11744         struct btrfs_chunk *chunk;
11745         struct btrfs_key key;
11746         int ret;
11747         u64 start;
11748
11749         btrfs_init_path(&path);
11750         key.objectid = 0;
11751         key.type = BTRFS_CHUNK_ITEM_KEY;
11752         key.offset = 0;
11753         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11754         if (ret < 0) {
11755                 btrfs_release_path(&path);
11756                 return ret;
11757         }
11758
11759         /*
11760          * We do this in case the block groups were screwed up and had alloc
11761          * bits that aren't actually set on the chunks.  This happens with
11762          * restored images every time and could happen in real life I guess.
11763          */
11764         fs_info->avail_data_alloc_bits = 0;
11765         fs_info->avail_metadata_alloc_bits = 0;
11766         fs_info->avail_system_alloc_bits = 0;
11767
11768         /* First we need to create the in-memory block groups */
11769         while (1) {
11770                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11771                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11772                         if (ret < 0) {
11773                                 btrfs_release_path(&path);
11774                                 return ret;
11775                         }
11776                         if (ret) {
11777                                 ret = 0;
11778                                 break;
11779                         }
11780                 }
11781                 leaf = path.nodes[0];
11782                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11783                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11784                         path.slots[0]++;
11785                         continue;
11786                 }
11787
11788                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11789                 btrfs_add_block_group(fs_info, 0,
11790                                       btrfs_chunk_type(leaf, chunk),
11791                                       key.objectid, key.offset,
11792                                       btrfs_chunk_length(leaf, chunk));
11793                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11794                                  key.offset + btrfs_chunk_length(leaf, chunk));
11795                 path.slots[0]++;
11796         }
11797         start = 0;
11798         while (1) {
11799                 cache = btrfs_lookup_first_block_group(fs_info, start);
11800                 if (!cache)
11801                         break;
11802                 cache->cached = 1;
11803                 start = cache->key.objectid + cache->key.offset;
11804         }
11805
11806         btrfs_release_path(&path);
11807         return 0;
11808 }
11809
11810 static int reset_balance(struct btrfs_trans_handle *trans,
11811                          struct btrfs_fs_info *fs_info)
11812 {
11813         struct btrfs_root *root = fs_info->tree_root;
11814         struct btrfs_path path;
11815         struct extent_buffer *leaf;
11816         struct btrfs_key key;
11817         int del_slot, del_nr = 0;
11818         int ret;
11819         int found = 0;
11820
11821         btrfs_init_path(&path);
11822         key.objectid = BTRFS_BALANCE_OBJECTID;
11823         key.type = BTRFS_BALANCE_ITEM_KEY;
11824         key.offset = 0;
11825         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11826         if (ret) {
11827                 if (ret > 0)
11828                         ret = 0;
11829                 if (!ret)
11830                         goto reinit_data_reloc;
11831                 else
11832                         goto out;
11833         }
11834
11835         ret = btrfs_del_item(trans, root, &path);
11836         if (ret)
11837                 goto out;
11838         btrfs_release_path(&path);
11839
11840         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11841         key.type = BTRFS_ROOT_ITEM_KEY;
11842         key.offset = 0;
11843         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11844         if (ret < 0)
11845                 goto out;
11846         while (1) {
11847                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11848                         if (!found)
11849                                 break;
11850
11851                         if (del_nr) {
11852                                 ret = btrfs_del_items(trans, root, &path,
11853                                                       del_slot, del_nr);
11854                                 del_nr = 0;
11855                                 if (ret)
11856                                         goto out;
11857                         }
11858                         key.offset++;
11859                         btrfs_release_path(&path);
11860
11861                         found = 0;
11862                         ret = btrfs_search_slot(trans, root, &key, &path,
11863                                                 -1, 1);
11864                         if (ret < 0)
11865                                 goto out;
11866                         continue;
11867                 }
11868                 found = 1;
11869                 leaf = path.nodes[0];
11870                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11871                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11872                         break;
11873                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11874                         path.slots[0]++;
11875                         continue;
11876                 }
11877                 if (!del_nr) {
11878                         del_slot = path.slots[0];
11879                         del_nr = 1;
11880                 } else {
11881                         del_nr++;
11882                 }
11883                 path.slots[0]++;
11884         }
11885
11886         if (del_nr) {
11887                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11888                 if (ret)
11889                         goto out;
11890         }
11891         btrfs_release_path(&path);
11892
11893 reinit_data_reloc:
11894         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11895         key.type = BTRFS_ROOT_ITEM_KEY;
11896         key.offset = (u64)-1;
11897         root = btrfs_read_fs_root(fs_info, &key);
11898         if (IS_ERR(root)) {
11899                 fprintf(stderr, "Error reading data reloc tree\n");
11900                 ret = PTR_ERR(root);
11901                 goto out;
11902         }
11903         record_root_in_trans(trans, root);
11904         ret = btrfs_fsck_reinit_root(trans, root, 0);
11905         if (ret)
11906                 goto out;
11907         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11908 out:
11909         btrfs_release_path(&path);
11910         return ret;
11911 }
11912
11913 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11914                               struct btrfs_fs_info *fs_info)
11915 {
11916         u64 start = 0;
11917         int ret;
11918
11919         /*
11920          * The only reason we don't do this is because right now we're just
11921          * walking the trees we find and pinning down their bytes, we don't look
11922          * at any of the leaves.  In order to do mixed groups we'd have to check
11923          * the leaves of any fs roots and pin down the bytes for any file
11924          * extents we find.  Not hard but why do it if we don't have to?
11925          */
11926         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11927                 fprintf(stderr, "We don't support re-initing the extent tree "
11928                         "for mixed block groups yet, please notify a btrfs "
11929                         "developer you want to do this so they can add this "
11930                         "functionality.\n");
11931                 return -EINVAL;
11932         }
11933
11934         /*
11935          * first we need to walk all of the trees except the extent tree and pin
11936          * down the bytes that are in use so we don't overwrite any existing
11937          * metadata.
11938          */
11939         ret = pin_metadata_blocks(fs_info);
11940         if (ret) {
11941                 fprintf(stderr, "error pinning down used bytes\n");
11942                 return ret;
11943         }
11944
11945         /*
11946          * Need to drop all the block groups since we're going to recreate all
11947          * of them again.
11948          */
11949         btrfs_free_block_groups(fs_info);
11950         ret = reset_block_groups(fs_info);
11951         if (ret) {
11952                 fprintf(stderr, "error resetting the block groups\n");
11953                 return ret;
11954         }
11955
11956         /* Ok we can allocate now, reinit the extent root */
11957         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11958         if (ret) {
11959                 fprintf(stderr, "extent root initialization failed\n");
11960                 /*
11961                  * When the transaction code is updated we should end the
11962                  * transaction, but for now progs only knows about commit so
11963                  * just return an error.
11964                  */
11965                 return ret;
11966         }
11967
11968         /*
11969          * Now we have all the in-memory block groups setup so we can make
11970          * allocations properly, and the metadata we care about is safe since we
11971          * pinned all of it above.
11972          */
11973         while (1) {
11974                 struct btrfs_block_group_cache *cache;
11975
11976                 cache = btrfs_lookup_first_block_group(fs_info, start);
11977                 if (!cache)
11978                         break;
11979                 start = cache->key.objectid + cache->key.offset;
11980                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11981                                         &cache->key, &cache->item,
11982                                         sizeof(cache->item));
11983                 if (ret) {
11984                         fprintf(stderr, "Error adding block group\n");
11985                         return ret;
11986                 }
11987                 btrfs_extent_post_op(trans, fs_info->extent_root);
11988         }
11989
11990         ret = reset_balance(trans, fs_info);
11991         if (ret)
11992                 fprintf(stderr, "error resetting the pending balance\n");
11993
11994         return ret;
11995 }
11996
11997 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11998 {
11999         struct btrfs_path path;
12000         struct btrfs_trans_handle *trans;
12001         struct btrfs_key key;
12002         int ret;
12003
12004         printf("Recowing metadata block %llu\n", eb->start);
12005         key.objectid = btrfs_header_owner(eb);
12006         key.type = BTRFS_ROOT_ITEM_KEY;
12007         key.offset = (u64)-1;
12008
12009         root = btrfs_read_fs_root(root->fs_info, &key);
12010         if (IS_ERR(root)) {
12011                 fprintf(stderr, "Couldn't find owner root %llu\n",
12012                         key.objectid);
12013                 return PTR_ERR(root);
12014         }
12015
12016         trans = btrfs_start_transaction(root, 1);
12017         if (IS_ERR(trans))
12018                 return PTR_ERR(trans);
12019
12020         btrfs_init_path(&path);
12021         path.lowest_level = btrfs_header_level(eb);
12022         if (path.lowest_level)
12023                 btrfs_node_key_to_cpu(eb, &key, 0);
12024         else
12025                 btrfs_item_key_to_cpu(eb, &key, 0);
12026
12027         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12028         btrfs_commit_transaction(trans, root);
12029         btrfs_release_path(&path);
12030         return ret;
12031 }
12032
12033 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12034 {
12035         struct btrfs_path path;
12036         struct btrfs_trans_handle *trans;
12037         struct btrfs_key key;
12038         int ret;
12039
12040         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12041                bad->key.type, bad->key.offset);
12042         key.objectid = bad->root_id;
12043         key.type = BTRFS_ROOT_ITEM_KEY;
12044         key.offset = (u64)-1;
12045
12046         root = btrfs_read_fs_root(root->fs_info, &key);
12047         if (IS_ERR(root)) {
12048                 fprintf(stderr, "Couldn't find owner root %llu\n",
12049                         key.objectid);
12050                 return PTR_ERR(root);
12051         }
12052
12053         trans = btrfs_start_transaction(root, 1);
12054         if (IS_ERR(trans))
12055                 return PTR_ERR(trans);
12056
12057         btrfs_init_path(&path);
12058         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12059         if (ret) {
12060                 if (ret > 0)
12061                         ret = 0;
12062                 goto out;
12063         }
12064         ret = btrfs_del_item(trans, root, &path);
12065 out:
12066         btrfs_commit_transaction(trans, root);
12067         btrfs_release_path(&path);
12068         return ret;
12069 }
12070
12071 static int zero_log_tree(struct btrfs_root *root)
12072 {
12073         struct btrfs_trans_handle *trans;
12074         int ret;
12075
12076         trans = btrfs_start_transaction(root, 1);
12077         if (IS_ERR(trans)) {
12078                 ret = PTR_ERR(trans);
12079                 return ret;
12080         }
12081         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12082         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12083         ret = btrfs_commit_transaction(trans, root);
12084         return ret;
12085 }
12086
12087 static int populate_csum(struct btrfs_trans_handle *trans,
12088                          struct btrfs_root *csum_root, char *buf, u64 start,
12089                          u64 len)
12090 {
12091         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12092         u64 offset = 0;
12093         u64 sectorsize;
12094         int ret = 0;
12095
12096         while (offset < len) {
12097                 sectorsize = fs_info->sectorsize;
12098                 ret = read_extent_data(fs_info, buf, start + offset,
12099                                        &sectorsize, 0);
12100                 if (ret)
12101                         break;
12102                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12103                                             start + offset, buf, sectorsize);
12104                 if (ret)
12105                         break;
12106                 offset += sectorsize;
12107         }
12108         return ret;
12109 }
12110
12111 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12112                                       struct btrfs_root *csum_root,
12113                                       struct btrfs_root *cur_root)
12114 {
12115         struct btrfs_path path;
12116         struct btrfs_key key;
12117         struct extent_buffer *node;
12118         struct btrfs_file_extent_item *fi;
12119         char *buf = NULL;
12120         u64 start = 0;
12121         u64 len = 0;
12122         int slot = 0;
12123         int ret = 0;
12124
12125         buf = malloc(cur_root->fs_info->sectorsize);
12126         if (!buf)
12127                 return -ENOMEM;
12128
12129         btrfs_init_path(&path);
12130         key.objectid = 0;
12131         key.offset = 0;
12132         key.type = 0;
12133         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12134         if (ret < 0)
12135                 goto out;
12136         /* Iterate all regular file extents and fill its csum */
12137         while (1) {
12138                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12139
12140                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12141                         goto next;
12142                 node = path.nodes[0];
12143                 slot = path.slots[0];
12144                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12145                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12146                         goto next;
12147                 start = btrfs_file_extent_disk_bytenr(node, fi);
12148                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12149
12150                 ret = populate_csum(trans, csum_root, buf, start, len);
12151                 if (ret == -EEXIST)
12152                         ret = 0;
12153                 if (ret < 0)
12154                         goto out;
12155 next:
12156                 /*
12157                  * TODO: if next leaf is corrupted, jump to nearest next valid
12158                  * leaf.
12159                  */
12160                 ret = btrfs_next_item(cur_root, &path);
12161                 if (ret < 0)
12162                         goto out;
12163                 if (ret > 0) {
12164                         ret = 0;
12165                         goto out;
12166                 }
12167         }
12168
12169 out:
12170         btrfs_release_path(&path);
12171         free(buf);
12172         return ret;
12173 }
12174
12175 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12176                                   struct btrfs_root *csum_root)
12177 {
12178         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12179         struct btrfs_path path;
12180         struct btrfs_root *tree_root = fs_info->tree_root;
12181         struct btrfs_root *cur_root;
12182         struct extent_buffer *node;
12183         struct btrfs_key key;
12184         int slot = 0;
12185         int ret = 0;
12186
12187         btrfs_init_path(&path);
12188         key.objectid = BTRFS_FS_TREE_OBJECTID;
12189         key.offset = 0;
12190         key.type = BTRFS_ROOT_ITEM_KEY;
12191         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12192         if (ret < 0)
12193                 goto out;
12194         if (ret > 0) {
12195                 ret = -ENOENT;
12196                 goto out;
12197         }
12198
12199         while (1) {
12200                 node = path.nodes[0];
12201                 slot = path.slots[0];
12202                 btrfs_item_key_to_cpu(node, &key, slot);
12203                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12204                         goto out;
12205                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12206                         goto next;
12207                 if (!is_fstree(key.objectid))
12208                         goto next;
12209                 key.offset = (u64)-1;
12210
12211                 cur_root = btrfs_read_fs_root(fs_info, &key);
12212                 if (IS_ERR(cur_root) || !cur_root) {
12213                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12214                                 key.objectid);
12215                         goto out;
12216                 }
12217                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12218                                 cur_root);
12219                 if (ret < 0)
12220                         goto out;
12221 next:
12222                 ret = btrfs_next_item(tree_root, &path);
12223                 if (ret > 0) {
12224                         ret = 0;
12225                         goto out;
12226                 }
12227                 if (ret < 0)
12228                         goto out;
12229         }
12230
12231 out:
12232         btrfs_release_path(&path);
12233         return ret;
12234 }
12235
12236 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12237                                       struct btrfs_root *csum_root)
12238 {
12239         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12240         struct btrfs_path path;
12241         struct btrfs_extent_item *ei;
12242         struct extent_buffer *leaf;
12243         char *buf;
12244         struct btrfs_key key;
12245         int ret;
12246
12247         btrfs_init_path(&path);
12248         key.objectid = 0;
12249         key.type = BTRFS_EXTENT_ITEM_KEY;
12250         key.offset = 0;
12251         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12252         if (ret < 0) {
12253                 btrfs_release_path(&path);
12254                 return ret;
12255         }
12256
12257         buf = malloc(csum_root->fs_info->sectorsize);
12258         if (!buf) {
12259                 btrfs_release_path(&path);
12260                 return -ENOMEM;
12261         }
12262
12263         while (1) {
12264                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12265                         ret = btrfs_next_leaf(extent_root, &path);
12266                         if (ret < 0)
12267                                 break;
12268                         if (ret) {
12269                                 ret = 0;
12270                                 break;
12271                         }
12272                 }
12273                 leaf = path.nodes[0];
12274
12275                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12276                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12277                         path.slots[0]++;
12278                         continue;
12279                 }
12280
12281                 ei = btrfs_item_ptr(leaf, path.slots[0],
12282                                     struct btrfs_extent_item);
12283                 if (!(btrfs_extent_flags(leaf, ei) &
12284                       BTRFS_EXTENT_FLAG_DATA)) {
12285                         path.slots[0]++;
12286                         continue;
12287                 }
12288
12289                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12290                                     key.offset);
12291                 if (ret)
12292                         break;
12293                 path.slots[0]++;
12294         }
12295
12296         btrfs_release_path(&path);
12297         free(buf);
12298         return ret;
12299 }
12300
12301 /*
12302  * Recalculate the csum and put it into the csum tree.
12303  *
12304  * Extent tree init will wipe out all the extent info, so in that case, we
12305  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12306  * will use fs/subvol trees to init the csum tree.
12307  */
12308 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12309                           struct btrfs_root *csum_root,
12310                           int search_fs_tree)
12311 {
12312         if (search_fs_tree)
12313                 return fill_csum_tree_from_fs(trans, csum_root);
12314         else
12315                 return fill_csum_tree_from_extent(trans, csum_root);
12316 }
12317
12318 static void free_roots_info_cache(void)
12319 {
12320         if (!roots_info_cache)
12321                 return;
12322
12323         while (!cache_tree_empty(roots_info_cache)) {
12324                 struct cache_extent *entry;
12325                 struct root_item_info *rii;
12326
12327                 entry = first_cache_extent(roots_info_cache);
12328                 if (!entry)
12329                         break;
12330                 remove_cache_extent(roots_info_cache, entry);
12331                 rii = container_of(entry, struct root_item_info, cache_extent);
12332                 free(rii);
12333         }
12334
12335         free(roots_info_cache);
12336         roots_info_cache = NULL;
12337 }
12338
12339 static int build_roots_info_cache(struct btrfs_fs_info *info)
12340 {
12341         int ret = 0;
12342         struct btrfs_key key;
12343         struct extent_buffer *leaf;
12344         struct btrfs_path path;
12345
12346         if (!roots_info_cache) {
12347                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12348                 if (!roots_info_cache)
12349                         return -ENOMEM;
12350                 cache_tree_init(roots_info_cache);
12351         }
12352
12353         btrfs_init_path(&path);
12354         key.objectid = 0;
12355         key.type = BTRFS_EXTENT_ITEM_KEY;
12356         key.offset = 0;
12357         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12358         if (ret < 0)
12359                 goto out;
12360         leaf = path.nodes[0];
12361
12362         while (1) {
12363                 struct btrfs_key found_key;
12364                 struct btrfs_extent_item *ei;
12365                 struct btrfs_extent_inline_ref *iref;
12366                 int slot = path.slots[0];
12367                 int type;
12368                 u64 flags;
12369                 u64 root_id;
12370                 u8 level;
12371                 struct cache_extent *entry;
12372                 struct root_item_info *rii;
12373
12374                 if (slot >= btrfs_header_nritems(leaf)) {
12375                         ret = btrfs_next_leaf(info->extent_root, &path);
12376                         if (ret < 0) {
12377                                 break;
12378                         } else if (ret) {
12379                                 ret = 0;
12380                                 break;
12381                         }
12382                         leaf = path.nodes[0];
12383                         slot = path.slots[0];
12384                 }
12385
12386                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12387
12388                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12389                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12390                         goto next;
12391
12392                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12393                 flags = btrfs_extent_flags(leaf, ei);
12394
12395                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12396                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12397                         goto next;
12398
12399                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12400                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12401                         level = found_key.offset;
12402                 } else {
12403                         struct btrfs_tree_block_info *binfo;
12404
12405                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12406                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12407                         level = btrfs_tree_block_level(leaf, binfo);
12408                 }
12409
12410                 /*
12411                  * For a root extent, it must be of the following type and the
12412                  * first (and only one) iref in the item.
12413                  */
12414                 type = btrfs_extent_inline_ref_type(leaf, iref);
12415                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12416                         goto next;
12417
12418                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12419                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12420                 if (!entry) {
12421                         rii = malloc(sizeof(struct root_item_info));
12422                         if (!rii) {
12423                                 ret = -ENOMEM;
12424                                 goto out;
12425                         }
12426                         rii->cache_extent.start = root_id;
12427                         rii->cache_extent.size = 1;
12428                         rii->level = (u8)-1;
12429                         entry = &rii->cache_extent;
12430                         ret = insert_cache_extent(roots_info_cache, entry);
12431                         ASSERT(ret == 0);
12432                 } else {
12433                         rii = container_of(entry, struct root_item_info,
12434                                            cache_extent);
12435                 }
12436
12437                 ASSERT(rii->cache_extent.start == root_id);
12438                 ASSERT(rii->cache_extent.size == 1);
12439
12440                 if (level > rii->level || rii->level == (u8)-1) {
12441                         rii->level = level;
12442                         rii->bytenr = found_key.objectid;
12443                         rii->gen = btrfs_extent_generation(leaf, ei);
12444                         rii->node_count = 1;
12445                 } else if (level == rii->level) {
12446                         rii->node_count++;
12447                 }
12448 next:
12449                 path.slots[0]++;
12450         }
12451
12452 out:
12453         btrfs_release_path(&path);
12454
12455         return ret;
12456 }
12457
12458 static int maybe_repair_root_item(struct btrfs_path *path,
12459                                   const struct btrfs_key *root_key,
12460                                   const int read_only_mode)
12461 {
12462         const u64 root_id = root_key->objectid;
12463         struct cache_extent *entry;
12464         struct root_item_info *rii;
12465         struct btrfs_root_item ri;
12466         unsigned long offset;
12467
12468         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12469         if (!entry) {
12470                 fprintf(stderr,
12471                         "Error: could not find extent items for root %llu\n",
12472                         root_key->objectid);
12473                 return -ENOENT;
12474         }
12475
12476         rii = container_of(entry, struct root_item_info, cache_extent);
12477         ASSERT(rii->cache_extent.start == root_id);
12478         ASSERT(rii->cache_extent.size == 1);
12479
12480         if (rii->node_count != 1) {
12481                 fprintf(stderr,
12482                         "Error: could not find btree root extent for root %llu\n",
12483                         root_id);
12484                 return -ENOENT;
12485         }
12486
12487         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12488         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12489
12490         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12491             btrfs_root_level(&ri) != rii->level ||
12492             btrfs_root_generation(&ri) != rii->gen) {
12493
12494                 /*
12495                  * If we're in repair mode but our caller told us to not update
12496                  * the root item, i.e. just check if it needs to be updated, don't
12497                  * print this message, since the caller will call us again shortly
12498                  * for the same root item without read only mode (the caller will
12499                  * open a transaction first).
12500                  */
12501                 if (!(read_only_mode && repair))
12502                         fprintf(stderr,
12503                                 "%sroot item for root %llu,"
12504                                 " current bytenr %llu, current gen %llu, current level %u,"
12505                                 " new bytenr %llu, new gen %llu, new level %u\n",
12506                                 (read_only_mode ? "" : "fixing "),
12507                                 root_id,
12508                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12509                                 btrfs_root_level(&ri),
12510                                 rii->bytenr, rii->gen, rii->level);
12511
12512                 if (btrfs_root_generation(&ri) > rii->gen) {
12513                         fprintf(stderr,
12514                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12515                                 root_id, btrfs_root_generation(&ri), rii->gen);
12516                         return -EINVAL;
12517                 }
12518
12519                 if (!read_only_mode) {
12520                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12521                         btrfs_set_root_level(&ri, rii->level);
12522                         btrfs_set_root_generation(&ri, rii->gen);
12523                         write_extent_buffer(path->nodes[0], &ri,
12524                                             offset, sizeof(ri));
12525                 }
12526
12527                 return 1;
12528         }
12529
12530         return 0;
12531 }
12532
12533 /*
12534  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12535  * caused read-only snapshots to be corrupted if they were created at a moment
12536  * when the source subvolume/snapshot had orphan items. The issue was that the
12537  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12538  * node instead of the post orphan cleanup root node.
12539  * So this function, and its callees, just detects and fixes those cases. Even
12540  * though the regression was for read-only snapshots, this function applies to
12541  * any snapshot/subvolume root.
12542  * This must be run before any other repair code - not doing it so, makes other
12543  * repair code delete or modify backrefs in the extent tree for example, which
12544  * will result in an inconsistent fs after repairing the root items.
12545  */
12546 static int repair_root_items(struct btrfs_fs_info *info)
12547 {
12548         struct btrfs_path path;
12549         struct btrfs_key key;
12550         struct extent_buffer *leaf;
12551         struct btrfs_trans_handle *trans = NULL;
12552         int ret = 0;
12553         int bad_roots = 0;
12554         int need_trans = 0;
12555
12556         btrfs_init_path(&path);
12557
12558         ret = build_roots_info_cache(info);
12559         if (ret)
12560                 goto out;
12561
12562         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12563         key.type = BTRFS_ROOT_ITEM_KEY;
12564         key.offset = 0;
12565
12566 again:
12567         /*
12568          * Avoid opening and committing transactions if a leaf doesn't have
12569          * any root items that need to be fixed, so that we avoid rotating
12570          * backup roots unnecessarily.
12571          */
12572         if (need_trans) {
12573                 trans = btrfs_start_transaction(info->tree_root, 1);
12574                 if (IS_ERR(trans)) {
12575                         ret = PTR_ERR(trans);
12576                         goto out;
12577                 }
12578         }
12579
12580         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12581                                 0, trans ? 1 : 0);
12582         if (ret < 0)
12583                 goto out;
12584         leaf = path.nodes[0];
12585
12586         while (1) {
12587                 struct btrfs_key found_key;
12588
12589                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12590                         int no_more_keys = find_next_key(&path, &key);
12591
12592                         btrfs_release_path(&path);
12593                         if (trans) {
12594                                 ret = btrfs_commit_transaction(trans,
12595                                                                info->tree_root);
12596                                 trans = NULL;
12597                                 if (ret < 0)
12598                                         goto out;
12599                         }
12600                         need_trans = 0;
12601                         if (no_more_keys)
12602                                 break;
12603                         goto again;
12604                 }
12605
12606                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12607
12608                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12609                         goto next;
12610                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12611                         goto next;
12612
12613                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12614                 if (ret < 0)
12615                         goto out;
12616                 if (ret) {
12617                         if (!trans && repair) {
12618                                 need_trans = 1;
12619                                 key = found_key;
12620                                 btrfs_release_path(&path);
12621                                 goto again;
12622                         }
12623                         bad_roots++;
12624                 }
12625 next:
12626                 path.slots[0]++;
12627         }
12628         ret = 0;
12629 out:
12630         free_roots_info_cache();
12631         btrfs_release_path(&path);
12632         if (trans)
12633                 btrfs_commit_transaction(trans, info->tree_root);
12634         if (ret < 0)
12635                 return ret;
12636
12637         return bad_roots;
12638 }
12639
12640 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12641 {
12642         struct btrfs_trans_handle *trans;
12643         struct btrfs_block_group_cache *bg_cache;
12644         u64 current = 0;
12645         int ret = 0;
12646
12647         /* Clear all free space cache inodes and its extent data */
12648         while (1) {
12649                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12650                 if (!bg_cache)
12651                         break;
12652                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12653                 if (ret < 0)
12654                         return ret;
12655                 current = bg_cache->key.objectid + bg_cache->key.offset;
12656         }
12657
12658         /* Don't forget to set cache_generation to -1 */
12659         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12660         if (IS_ERR(trans)) {
12661                 error("failed to update super block cache generation");
12662                 return PTR_ERR(trans);
12663         }
12664         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12665         btrfs_commit_transaction(trans, fs_info->tree_root);
12666
12667         return ret;
12668 }
12669
12670 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12671                 int clear_version)
12672 {
12673         int ret = 0;
12674
12675         if (clear_version == 1) {
12676                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12677                         error(
12678                 "free space cache v2 detected, use --clear-space-cache v2");
12679                         ret = 1;
12680                         goto close_out;
12681                 }
12682                 printf("Clearing free space cache\n");
12683                 ret = clear_free_space_cache(fs_info);
12684                 if (ret) {
12685                         error("failed to clear free space cache");
12686                         ret = 1;
12687                 } else {
12688                         printf("Free space cache cleared\n");
12689                 }
12690         } else if (clear_version == 2) {
12691                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12692                         printf("no free space cache v2 to clear\n");
12693                         ret = 0;
12694                         goto close_out;
12695                 }
12696                 printf("Clear free space cache v2\n");
12697                 ret = btrfs_clear_free_space_tree(fs_info);
12698                 if (ret) {
12699                         error("failed to clear free space cache v2: %d", ret);
12700                         ret = 1;
12701                 } else {
12702                         printf("free space cache v2 cleared\n");
12703                 }
12704         }
12705 close_out:
12706         return ret;
12707 }
12708
12709 const char * const cmd_check_usage[] = {
12710         "btrfs check [options] <device>",
12711         "Check structural integrity of a filesystem (unmounted).",
12712         "Check structural integrity of an unmounted filesystem. Verify internal",
12713         "trees' consistency and item connectivity. In the repair mode try to",
12714         "fix the problems found. ",
12715         "WARNING: the repair mode is considered dangerous",
12716         "",
12717         "-s|--super <superblock>     use this superblock copy",
12718         "-b|--backup                 use the first valid backup root copy",
12719         "--repair                    try to repair the filesystem",
12720         "--readonly                  run in read-only mode (default)",
12721         "--init-csum-tree            create a new CRC tree",
12722         "--init-extent-tree          create a new extent tree",
12723         "--mode <MODE>               allows choice of memory/IO trade-offs",
12724         "                            where MODE is one of:",
12725         "                            original - read inodes and extents to memory (requires",
12726         "                                       more memory, does less IO)",
12727         "                            lowmem   - try to use less memory but read blocks again",
12728         "                                       when needed",
12729         "--check-data-csum           verify checksums of data blocks",
12730         "-Q|--qgroup-report          print a report on qgroup consistency",
12731         "-E|--subvol-extents <subvolid>",
12732         "                            print subvolume extents and sharing state",
12733         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12734         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12735         "-p|--progress               indicate progress",
12736         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12737         NULL
12738 };
12739
12740 int cmd_check(int argc, char **argv)
12741 {
12742         struct cache_tree root_cache;
12743         struct btrfs_root *root;
12744         struct btrfs_fs_info *info;
12745         u64 bytenr = 0;
12746         u64 subvolid = 0;
12747         u64 tree_root_bytenr = 0;
12748         u64 chunk_root_bytenr = 0;
12749         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12750         int ret;
12751         int err = 0;
12752         u64 num;
12753         int init_csum_tree = 0;
12754         int readonly = 0;
12755         int clear_space_cache = 0;
12756         int qgroup_report = 0;
12757         int qgroups_repaired = 0;
12758         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12759
12760         while(1) {
12761                 int c;
12762                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12763                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12764                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12765                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12766                 static const struct option long_options[] = {
12767                         { "super", required_argument, NULL, 's' },
12768                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12769                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12770                         { "init-csum-tree", no_argument, NULL,
12771                                 GETOPT_VAL_INIT_CSUM },
12772                         { "init-extent-tree", no_argument, NULL,
12773                                 GETOPT_VAL_INIT_EXTENT },
12774                         { "check-data-csum", no_argument, NULL,
12775                                 GETOPT_VAL_CHECK_CSUM },
12776                         { "backup", no_argument, NULL, 'b' },
12777                         { "subvol-extents", required_argument, NULL, 'E' },
12778                         { "qgroup-report", no_argument, NULL, 'Q' },
12779                         { "tree-root", required_argument, NULL, 'r' },
12780                         { "chunk-root", required_argument, NULL,
12781                                 GETOPT_VAL_CHUNK_TREE },
12782                         { "progress", no_argument, NULL, 'p' },
12783                         { "mode", required_argument, NULL,
12784                                 GETOPT_VAL_MODE },
12785                         { "clear-space-cache", required_argument, NULL,
12786                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12787                         { NULL, 0, NULL, 0}
12788                 };
12789
12790                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12791                 if (c < 0)
12792                         break;
12793                 switch(c) {
12794                         case 'a': /* ignored */ break;
12795                         case 'b':
12796                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12797                                 break;
12798                         case 's':
12799                                 num = arg_strtou64(optarg);
12800                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12801                                         error(
12802                                         "super mirror should be less than %d",
12803                                                 BTRFS_SUPER_MIRROR_MAX);
12804                                         exit(1);
12805                                 }
12806                                 bytenr = btrfs_sb_offset(((int)num));
12807                                 printf("using SB copy %llu, bytenr %llu\n", num,
12808                                        (unsigned long long)bytenr);
12809                                 break;
12810                         case 'Q':
12811                                 qgroup_report = 1;
12812                                 break;
12813                         case 'E':
12814                                 subvolid = arg_strtou64(optarg);
12815                                 break;
12816                         case 'r':
12817                                 tree_root_bytenr = arg_strtou64(optarg);
12818                                 break;
12819                         case GETOPT_VAL_CHUNK_TREE:
12820                                 chunk_root_bytenr = arg_strtou64(optarg);
12821                                 break;
12822                         case 'p':
12823                                 ctx.progress_enabled = true;
12824                                 break;
12825                         case '?':
12826                         case 'h':
12827                                 usage(cmd_check_usage);
12828                         case GETOPT_VAL_REPAIR:
12829                                 printf("enabling repair mode\n");
12830                                 repair = 1;
12831                                 ctree_flags |= OPEN_CTREE_WRITES;
12832                                 break;
12833                         case GETOPT_VAL_READONLY:
12834                                 readonly = 1;
12835                                 break;
12836                         case GETOPT_VAL_INIT_CSUM:
12837                                 printf("Creating a new CRC tree\n");
12838                                 init_csum_tree = 1;
12839                                 repair = 1;
12840                                 ctree_flags |= OPEN_CTREE_WRITES;
12841                                 break;
12842                         case GETOPT_VAL_INIT_EXTENT:
12843                                 init_extent_tree = 1;
12844                                 ctree_flags |= (OPEN_CTREE_WRITES |
12845                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12846                                 repair = 1;
12847                                 break;
12848                         case GETOPT_VAL_CHECK_CSUM:
12849                                 check_data_csum = 1;
12850                                 break;
12851                         case GETOPT_VAL_MODE:
12852                                 check_mode = parse_check_mode(optarg);
12853                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12854                                         error("unknown mode: %s", optarg);
12855                                         exit(1);
12856                                 }
12857                                 break;
12858                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12859                                 if (strcmp(optarg, "v1") == 0) {
12860                                         clear_space_cache = 1;
12861                                 } else if (strcmp(optarg, "v2") == 0) {
12862                                         clear_space_cache = 2;
12863                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12864                                 } else {
12865                                         error(
12866                 "invalid argument to --clear-space-cache, must be v1 or v2");
12867                                         exit(1);
12868                                 }
12869                                 ctree_flags |= OPEN_CTREE_WRITES;
12870                                 break;
12871                 }
12872         }
12873
12874         if (check_argc_exact(argc - optind, 1))
12875                 usage(cmd_check_usage);
12876
12877         if (ctx.progress_enabled) {
12878                 ctx.tp = TASK_NOTHING;
12879                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12880         }
12881
12882         /* This check is the only reason for --readonly to exist */
12883         if (readonly && repair) {
12884                 error("repair options are not compatible with --readonly");
12885                 exit(1);
12886         }
12887
12888         /*
12889          * Not supported yet
12890          */
12891         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12892                 error("low memory mode doesn't support repair yet");
12893                 exit(1);
12894         }
12895
12896         radix_tree_init();
12897         cache_tree_init(&root_cache);
12898
12899         if((ret = check_mounted(argv[optind])) < 0) {
12900                 error("could not check mount status: %s", strerror(-ret));
12901                 err |= !!ret;
12902                 goto err_out;
12903         } else if(ret) {
12904                 error("%s is currently mounted, aborting", argv[optind]);
12905                 ret = -EBUSY;
12906                 err |= !!ret;
12907                 goto err_out;
12908         }
12909
12910         /* only allow partial opening under repair mode */
12911         if (repair)
12912                 ctree_flags |= OPEN_CTREE_PARTIAL;
12913
12914         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12915                                   chunk_root_bytenr, ctree_flags);
12916         if (!info) {
12917                 error("cannot open file system");
12918                 ret = -EIO;
12919                 err |= !!ret;
12920                 goto err_out;
12921         }
12922
12923         global_info = info;
12924         root = info->fs_root;
12925         uuid_unparse(info->super_copy->fsid, uuidbuf);
12926
12927         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12928
12929         /*
12930          * Check the bare minimum before starting anything else that could rely
12931          * on it, namely the tree roots, any local consistency checks
12932          */
12933         if (!extent_buffer_uptodate(info->tree_root->node) ||
12934             !extent_buffer_uptodate(info->dev_root->node) ||
12935             !extent_buffer_uptodate(info->chunk_root->node)) {
12936                 error("critical roots corrupted, unable to check the filesystem");
12937                 err |= !!ret;
12938                 ret = -EIO;
12939                 goto close_out;
12940         }
12941
12942         if (clear_space_cache) {
12943                 ret = do_clear_free_space_cache(info, clear_space_cache);
12944                 err |= !!ret;
12945                 goto close_out;
12946         }
12947
12948         /*
12949          * repair mode will force us to commit transaction which
12950          * will make us fail to load log tree when mounting.
12951          */
12952         if (repair && btrfs_super_log_root(info->super_copy)) {
12953                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12954                 if (!ret) {
12955                         ret = 1;
12956                         err |= !!ret;
12957                         goto close_out;
12958                 }
12959                 ret = zero_log_tree(root);
12960                 err |= !!ret;
12961                 if (ret) {
12962                         error("failed to zero log tree: %d", ret);
12963                         goto close_out;
12964                 }
12965         }
12966
12967         if (qgroup_report) {
12968                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12969                        uuidbuf);
12970                 ret = qgroup_verify_all(info);
12971                 err |= !!ret;
12972                 if (ret == 0)
12973                         report_qgroups(1);
12974                 goto close_out;
12975         }
12976         if (subvolid) {
12977                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12978                        subvolid, argv[optind], uuidbuf);
12979                 ret = print_extent_state(info, subvolid);
12980                 err |= !!ret;
12981                 goto close_out;
12982         }
12983
12984         if (init_extent_tree || init_csum_tree) {
12985                 struct btrfs_trans_handle *trans;
12986
12987                 trans = btrfs_start_transaction(info->extent_root, 0);
12988                 if (IS_ERR(trans)) {
12989                         error("error starting transaction");
12990                         ret = PTR_ERR(trans);
12991                         err |= !!ret;
12992                         goto close_out;
12993                 }
12994
12995                 if (init_extent_tree) {
12996                         printf("Creating a new extent tree\n");
12997                         ret = reinit_extent_tree(trans, info);
12998                         err |= !!ret;
12999                         if (ret)
13000                                 goto close_out;
13001                 }
13002
13003                 if (init_csum_tree) {
13004                         printf("Reinitialize checksum tree\n");
13005                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13006                         if (ret) {
13007                                 error("checksum tree initialization failed: %d",
13008                                                 ret);
13009                                 ret = -EIO;
13010                                 err |= !!ret;
13011                                 goto close_out;
13012                         }
13013
13014                         ret = fill_csum_tree(trans, info->csum_root,
13015                                              init_extent_tree);
13016                         err |= !!ret;
13017                         if (ret) {
13018                                 error("checksum tree refilling failed: %d", ret);
13019                                 return -EIO;
13020                         }
13021                 }
13022                 /*
13023                  * Ok now we commit and run the normal fsck, which will add
13024                  * extent entries for all of the items it finds.
13025                  */
13026                 ret = btrfs_commit_transaction(trans, info->extent_root);
13027                 err |= !!ret;
13028                 if (ret)
13029                         goto close_out;
13030         }
13031         if (!extent_buffer_uptodate(info->extent_root->node)) {
13032                 error("critical: extent_root, unable to check the filesystem");
13033                 ret = -EIO;
13034                 err |= !!ret;
13035                 goto close_out;
13036         }
13037         if (!extent_buffer_uptodate(info->csum_root->node)) {
13038                 error("critical: csum_root, unable to check the filesystem");
13039                 ret = -EIO;
13040                 err |= !!ret;
13041                 goto close_out;
13042         }
13043
13044         ret = do_check_chunks_and_extents(info);
13045         err |= !!ret;
13046         if (ret)
13047                 error(
13048                 "errors found in extent allocation tree or chunk allocation");
13049
13050         ret = repair_root_items(info);
13051         err |= !!ret;
13052         if (ret < 0) {
13053                 error("failed to repair root items: %s", strerror(-ret));
13054                 goto close_out;
13055         }
13056         if (repair) {
13057                 fprintf(stderr, "Fixed %d roots.\n", ret);
13058                 ret = 0;
13059         } else if (ret > 0) {
13060                 fprintf(stderr,
13061                        "Found %d roots with an outdated root item.\n",
13062                        ret);
13063                 fprintf(stderr,
13064                         "Please run a filesystem check with the option --repair to fix them.\n");
13065                 ret = 1;
13066                 err |= !!ret;
13067                 goto close_out;
13068         }
13069
13070         if (!ctx.progress_enabled) {
13071                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13072                         fprintf(stderr, "checking free space tree\n");
13073                 else
13074                         fprintf(stderr, "checking free space cache\n");
13075         }
13076         ret = check_space_cache(root);
13077         err |= !!ret;
13078         if (ret) {
13079                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13080                         error("errors found in free space tree");
13081                 else
13082                         error("errors found in free space cache");
13083                 goto out;
13084         }
13085
13086         /*
13087          * We used to have to have these hole extents in between our real
13088          * extents so if we don't have this flag set we need to make sure there
13089          * are no gaps in the file extents for inodes, otherwise we can just
13090          * ignore it when this happens.
13091          */
13092         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13093         if (!ctx.progress_enabled)
13094                 fprintf(stderr, "checking fs roots\n");
13095         if (check_mode == CHECK_MODE_LOWMEM)
13096                 ret = check_fs_roots_v2(root->fs_info);
13097         else
13098                 ret = check_fs_roots(info, &root_cache);
13099         err |= !!ret;
13100         if (ret) {
13101                 error("errors found in fs roots");
13102                 goto out;
13103         }
13104
13105         fprintf(stderr, "checking csums\n");
13106         ret = check_csums(root);
13107         err |= !!ret;
13108         if (ret) {
13109                 error("errors found in csum tree");
13110                 goto out;
13111         }
13112
13113         fprintf(stderr, "checking root refs\n");
13114         /* For low memory mode, check_fs_roots_v2 handles root refs */
13115         if (check_mode != CHECK_MODE_LOWMEM) {
13116                 ret = check_root_refs(root, &root_cache);
13117                 err |= !!ret;
13118                 if (ret) {
13119                         error("errors found in root refs");
13120                         goto out;
13121                 }
13122         }
13123
13124         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13125                 struct extent_buffer *eb;
13126
13127                 eb = list_first_entry(&root->fs_info->recow_ebs,
13128                                       struct extent_buffer, recow);
13129                 list_del_init(&eb->recow);
13130                 ret = recow_extent_buffer(root, eb);
13131                 err |= !!ret;
13132                 if (ret) {
13133                         error("fails to fix transid errors");
13134                         break;
13135                 }
13136         }
13137
13138         while (!list_empty(&delete_items)) {
13139                 struct bad_item *bad;
13140
13141                 bad = list_first_entry(&delete_items, struct bad_item, list);
13142                 list_del_init(&bad->list);
13143                 if (repair) {
13144                         ret = delete_bad_item(root, bad);
13145                         err |= !!ret;
13146                 }
13147                 free(bad);
13148         }
13149
13150         if (info->quota_enabled) {
13151                 fprintf(stderr, "checking quota groups\n");
13152                 ret = qgroup_verify_all(info);
13153                 err |= !!ret;
13154                 if (ret) {
13155                         error("failed to check quota groups");
13156                         goto out;
13157                 }
13158                 report_qgroups(0);
13159                 ret = repair_qgroups(info, &qgroups_repaired);
13160                 err |= !!ret;
13161                 if (err) {
13162                         error("failed to repair quota groups");
13163                         goto out;
13164                 }
13165                 ret = 0;
13166         }
13167
13168         if (!list_empty(&root->fs_info->recow_ebs)) {
13169                 error("transid errors in file system");
13170                 ret = 1;
13171                 err |= !!ret;
13172         }
13173 out:
13174         printf("found %llu bytes used, ",
13175                (unsigned long long)bytes_used);
13176         if (err)
13177                 printf("error(s) found\n");
13178         else
13179                 printf("no error found\n");
13180         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13181         printf("total tree bytes: %llu\n",
13182                (unsigned long long)total_btree_bytes);
13183         printf("total fs tree bytes: %llu\n",
13184                (unsigned long long)total_fs_tree_bytes);
13185         printf("total extent tree bytes: %llu\n",
13186                (unsigned long long)total_extent_tree_bytes);
13187         printf("btree space waste bytes: %llu\n",
13188                (unsigned long long)btree_space_waste);
13189         printf("file data blocks allocated: %llu\n referenced %llu\n",
13190                 (unsigned long long)data_bytes_allocated,
13191                 (unsigned long long)data_bytes_referenced);
13192
13193         free_qgroup_counts();
13194         free_root_recs_tree(&root_cache);
13195 close_out:
13196         close_ctree(root);
13197 err_out:
13198         if (ctx.progress_enabled)
13199                 task_deinit(ctx.info);
13200
13201         return err;
13202 }