btrfs-progs: check: move more of space cache clearing to a helper
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         struct btrfs_key drop_key;
229 };
230
231 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
232 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
233 #define REF_ERR_NO_INODE_REF            (1 << 2)
234 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
235 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
236 #define REF_ERR_DUP_INODE_REF           (1 << 5)
237 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
238 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
239 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
240 #define REF_ERR_NO_ROOT_REF             (1 << 9)
241 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
242 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
243 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
244
245 struct file_extent_hole {
246         struct rb_node node;
247         u64 start;
248         u64 len;
249 };
250
251 struct inode_record {
252         struct list_head backrefs;
253         unsigned int checked:1;
254         unsigned int merging:1;
255         unsigned int found_inode_item:1;
256         unsigned int found_dir_item:1;
257         unsigned int found_file_extent:1;
258         unsigned int found_csum_item:1;
259         unsigned int some_csum_missing:1;
260         unsigned int nodatasum:1;
261         int errors;
262
263         u64 ino;
264         u32 nlink;
265         u32 imode;
266         u64 isize;
267         u64 nbytes;
268
269         u32 found_link;
270         u64 found_size;
271         u64 extent_start;
272         u64 extent_end;
273         struct rb_root holes;
274         struct list_head orphan_extents;
275
276         u32 refs;
277 };
278
279 #define I_ERR_NO_INODE_ITEM             (1 << 0)
280 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
281 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
282 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
283 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
284 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
285 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
286 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
287 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
288 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
289 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
290 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
291 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
292 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
293 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
294
295 struct root_backref {
296         struct list_head list;
297         unsigned int found_dir_item:1;
298         unsigned int found_dir_index:1;
299         unsigned int found_back_ref:1;
300         unsigned int found_forward_ref:1;
301         unsigned int reachable:1;
302         int errors;
303         u64 ref_root;
304         u64 dir;
305         u64 index;
306         u16 namelen;
307         char name[0];
308 };
309
310 static inline struct root_backref* to_root_backref(struct list_head *entry)
311 {
312         return list_entry(entry, struct root_backref, list);
313 }
314
315 struct root_record {
316         struct list_head backrefs;
317         struct cache_extent cache;
318         unsigned int found_root_item:1;
319         u64 objectid;
320         u32 found_ref;
321 };
322
323 struct ptr_node {
324         struct cache_extent cache;
325         void *data;
326 };
327
328 struct shared_node {
329         struct cache_extent cache;
330         struct cache_tree root_cache;
331         struct cache_tree inode_cache;
332         struct inode_record *current;
333         u32 refs;
334 };
335
336 struct block_info {
337         u64 start;
338         u32 size;
339 };
340
341 struct walk_control {
342         struct cache_tree shared;
343         struct shared_node *nodes[BTRFS_MAX_LEVEL];
344         int active_node;
345         int root_level;
346 };
347
348 struct bad_item {
349         struct btrfs_key key;
350         u64 root_id;
351         struct list_head list;
352 };
353
354 struct extent_entry {
355         u64 bytenr;
356         u64 bytes;
357         int count;
358         int broken;
359         struct list_head list;
360 };
361
362 struct root_item_info {
363         /* level of the root */
364         u8 level;
365         /* number of nodes at this level, must be 1 for a root */
366         int node_count;
367         u64 bytenr;
368         u64 gen;
369         struct cache_extent cache_extent;
370 };
371
372 /*
373  * Error bit for low memory mode check.
374  *
375  * Currently no caller cares about it yet.  Just internal use for error
376  * classification.
377  */
378 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
379 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
380 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
381 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
382 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
383 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
384 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
385 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
386 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
387 #define CHUNK_TYPE_MISMATCH     (1 << 8)
388
389 static void *print_status_check(void *p)
390 {
391         struct task_ctx *priv = p;
392         const char work_indicator[] = { '.', 'o', 'O', 'o' };
393         uint32_t count = 0;
394         static char *task_position_string[] = {
395                 "checking extents",
396                 "checking free space cache",
397                 "checking fs roots",
398         };
399
400         task_period_start(priv->info, 1000 /* 1s */);
401
402         if (priv->tp == TASK_NOTHING)
403                 return NULL;
404
405         while (1) {
406                 printf("%s [%c]\r", task_position_string[priv->tp],
407                                 work_indicator[count % 4]);
408                 count++;
409                 fflush(stdout);
410                 task_period_wait(priv->info);
411         }
412         return NULL;
413 }
414
415 static int print_status_return(void *p)
416 {
417         printf("\n");
418         fflush(stdout);
419
420         return 0;
421 }
422
423 static enum btrfs_check_mode parse_check_mode(const char *str)
424 {
425         if (strcmp(str, "lowmem") == 0)
426                 return CHECK_MODE_LOWMEM;
427         if (strcmp(str, "orig") == 0)
428                 return CHECK_MODE_ORIGINAL;
429         if (strcmp(str, "original") == 0)
430                 return CHECK_MODE_ORIGINAL;
431
432         return CHECK_MODE_UNKNOWN;
433 }
434
435 /* Compatible function to allow reuse of old codes */
436 static u64 first_extent_gap(struct rb_root *holes)
437 {
438         struct file_extent_hole *hole;
439
440         if (RB_EMPTY_ROOT(holes))
441                 return (u64)-1;
442
443         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
444         return hole->start;
445 }
446
447 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
448 {
449         struct file_extent_hole *hole1;
450         struct file_extent_hole *hole2;
451
452         hole1 = rb_entry(node1, struct file_extent_hole, node);
453         hole2 = rb_entry(node2, struct file_extent_hole, node);
454
455         if (hole1->start > hole2->start)
456                 return -1;
457         if (hole1->start < hole2->start)
458                 return 1;
459         /* Now hole1->start == hole2->start */
460         if (hole1->len >= hole2->len)
461                 /*
462                  * Hole 1 will be merge center
463                  * Same hole will be merged later
464                  */
465                 return -1;
466         /* Hole 2 will be merge center */
467         return 1;
468 }
469
470 /*
471  * Add a hole to the record
472  *
473  * This will do hole merge for copy_file_extent_holes(),
474  * which will ensure there won't be continuous holes.
475  */
476 static int add_file_extent_hole(struct rb_root *holes,
477                                 u64 start, u64 len)
478 {
479         struct file_extent_hole *hole;
480         struct file_extent_hole *prev = NULL;
481         struct file_extent_hole *next = NULL;
482
483         hole = malloc(sizeof(*hole));
484         if (!hole)
485                 return -ENOMEM;
486         hole->start = start;
487         hole->len = len;
488         /* Since compare will not return 0, no -EEXIST will happen */
489         rb_insert(holes, &hole->node, compare_hole);
490
491         /* simple merge with previous hole */
492         if (rb_prev(&hole->node))
493                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
494                                 node);
495         if (prev && prev->start + prev->len >= hole->start) {
496                 hole->len = hole->start + hole->len - prev->start;
497                 hole->start = prev->start;
498                 rb_erase(&prev->node, holes);
499                 free(prev);
500                 prev = NULL;
501         }
502
503         /* iterate merge with next holes */
504         while (1) {
505                 if (!rb_next(&hole->node))
506                         break;
507                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
508                                         node);
509                 if (hole->start + hole->len >= next->start) {
510                         if (hole->start + hole->len <= next->start + next->len)
511                                 hole->len = next->start + next->len -
512                                             hole->start;
513                         rb_erase(&next->node, holes);
514                         free(next);
515                         next = NULL;
516                 } else
517                         break;
518         }
519         return 0;
520 }
521
522 static int compare_hole_range(struct rb_node *node, void *data)
523 {
524         struct file_extent_hole *hole;
525         u64 start;
526
527         hole = (struct file_extent_hole *)data;
528         start = hole->start;
529
530         hole = rb_entry(node, struct file_extent_hole, node);
531         if (start < hole->start)
532                 return -1;
533         if (start >= hole->start && start < hole->start + hole->len)
534                 return 0;
535         return 1;
536 }
537
538 /*
539  * Delete a hole in the record
540  *
541  * This will do the hole split and is much restrict than add.
542  */
543 static int del_file_extent_hole(struct rb_root *holes,
544                                 u64 start, u64 len)
545 {
546         struct file_extent_hole *hole;
547         struct file_extent_hole tmp;
548         u64 prev_start = 0;
549         u64 prev_len = 0;
550         u64 next_start = 0;
551         u64 next_len = 0;
552         struct rb_node *node;
553         int have_prev = 0;
554         int have_next = 0;
555         int ret = 0;
556
557         tmp.start = start;
558         tmp.len = len;
559         node = rb_search(holes, &tmp, compare_hole_range, NULL);
560         if (!node)
561                 return -EEXIST;
562         hole = rb_entry(node, struct file_extent_hole, node);
563         if (start + len > hole->start + hole->len)
564                 return -EEXIST;
565
566         /*
567          * Now there will be no overlap, delete the hole and re-add the
568          * split(s) if they exists.
569          */
570         if (start > hole->start) {
571                 prev_start = hole->start;
572                 prev_len = start - hole->start;
573                 have_prev = 1;
574         }
575         if (hole->start + hole->len > start + len) {
576                 next_start = start + len;
577                 next_len = hole->start + hole->len - start - len;
578                 have_next = 1;
579         }
580         rb_erase(node, holes);
581         free(hole);
582         if (have_prev) {
583                 ret = add_file_extent_hole(holes, prev_start, prev_len);
584                 if (ret < 0)
585                         return ret;
586         }
587         if (have_next) {
588                 ret = add_file_extent_hole(holes, next_start, next_len);
589                 if (ret < 0)
590                         return ret;
591         }
592         return 0;
593 }
594
595 static int copy_file_extent_holes(struct rb_root *dst,
596                                   struct rb_root *src)
597 {
598         struct file_extent_hole *hole;
599         struct rb_node *node;
600         int ret = 0;
601
602         node = rb_first(src);
603         while (node) {
604                 hole = rb_entry(node, struct file_extent_hole, node);
605                 ret = add_file_extent_hole(dst, hole->start, hole->len);
606                 if (ret)
607                         break;
608                 node = rb_next(node);
609         }
610         return ret;
611 }
612
613 static void free_file_extent_holes(struct rb_root *holes)
614 {
615         struct rb_node *node;
616         struct file_extent_hole *hole;
617
618         node = rb_first(holes);
619         while (node) {
620                 hole = rb_entry(node, struct file_extent_hole, node);
621                 rb_erase(node, holes);
622                 free(hole);
623                 node = rb_first(holes);
624         }
625 }
626
627 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
628
629 static void record_root_in_trans(struct btrfs_trans_handle *trans,
630                                  struct btrfs_root *root)
631 {
632         if (root->last_trans != trans->transid) {
633                 root->track_dirty = 1;
634                 root->last_trans = trans->transid;
635                 root->commit_root = root->node;
636                 extent_buffer_get(root->node);
637         }
638 }
639
640 static u8 imode_to_type(u32 imode)
641 {
642 #define S_SHIFT 12
643         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
644                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
645                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
646                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
647                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
648                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
649                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
650                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
651         };
652
653         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
654 #undef S_SHIFT
655 }
656
657 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
658 {
659         struct device_record *rec1;
660         struct device_record *rec2;
661
662         rec1 = rb_entry(node1, struct device_record, node);
663         rec2 = rb_entry(node2, struct device_record, node);
664         if (rec1->devid > rec2->devid)
665                 return -1;
666         else if (rec1->devid < rec2->devid)
667                 return 1;
668         else
669                 return 0;
670 }
671
672 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
673 {
674         struct inode_record *rec;
675         struct inode_backref *backref;
676         struct inode_backref *orig;
677         struct inode_backref *tmp;
678         struct orphan_data_extent *src_orphan;
679         struct orphan_data_extent *dst_orphan;
680         struct rb_node *rb;
681         size_t size;
682         int ret;
683
684         rec = malloc(sizeof(*rec));
685         if (!rec)
686                 return ERR_PTR(-ENOMEM);
687         memcpy(rec, orig_rec, sizeof(*rec));
688         rec->refs = 1;
689         INIT_LIST_HEAD(&rec->backrefs);
690         INIT_LIST_HEAD(&rec->orphan_extents);
691         rec->holes = RB_ROOT;
692
693         list_for_each_entry(orig, &orig_rec->backrefs, list) {
694                 size = sizeof(*orig) + orig->namelen + 1;
695                 backref = malloc(size);
696                 if (!backref) {
697                         ret = -ENOMEM;
698                         goto cleanup;
699                 }
700                 memcpy(backref, orig, size);
701                 list_add_tail(&backref->list, &rec->backrefs);
702         }
703         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
704                 dst_orphan = malloc(sizeof(*dst_orphan));
705                 if (!dst_orphan) {
706                         ret = -ENOMEM;
707                         goto cleanup;
708                 }
709                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
710                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
711         }
712         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
713         if (ret < 0)
714                 goto cleanup_rb;
715
716         return rec;
717
718 cleanup_rb:
719         rb = rb_first(&rec->holes);
720         while (rb) {
721                 struct file_extent_hole *hole;
722
723                 hole = rb_entry(rb, struct file_extent_hole, node);
724                 rb = rb_next(rb);
725                 free(hole);
726         }
727
728 cleanup:
729         if (!list_empty(&rec->backrefs))
730                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
731                         list_del(&orig->list);
732                         free(orig);
733                 }
734
735         if (!list_empty(&rec->orphan_extents))
736                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
737                         list_del(&orig->list);
738                         free(orig);
739                 }
740
741         free(rec);
742
743         return ERR_PTR(ret);
744 }
745
746 static void print_orphan_data_extents(struct list_head *orphan_extents,
747                                       u64 objectid)
748 {
749         struct orphan_data_extent *orphan;
750
751         if (list_empty(orphan_extents))
752                 return;
753         printf("The following data extent is lost in tree %llu:\n",
754                objectid);
755         list_for_each_entry(orphan, orphan_extents, list) {
756                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
757                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
758                        orphan->disk_len);
759         }
760 }
761
762 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
763 {
764         u64 root_objectid = root->root_key.objectid;
765         int errors = rec->errors;
766
767         if (!errors)
768                 return;
769         /* reloc root errors, we print its corresponding fs root objectid*/
770         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
771                 root_objectid = root->root_key.offset;
772                 fprintf(stderr, "reloc");
773         }
774         fprintf(stderr, "root %llu inode %llu errors %x",
775                 (unsigned long long) root_objectid,
776                 (unsigned long long) rec->ino, rec->errors);
777
778         if (errors & I_ERR_NO_INODE_ITEM)
779                 fprintf(stderr, ", no inode item");
780         if (errors & I_ERR_NO_ORPHAN_ITEM)
781                 fprintf(stderr, ", no orphan item");
782         if (errors & I_ERR_DUP_INODE_ITEM)
783                 fprintf(stderr, ", dup inode item");
784         if (errors & I_ERR_DUP_DIR_INDEX)
785                 fprintf(stderr, ", dup dir index");
786         if (errors & I_ERR_ODD_DIR_ITEM)
787                 fprintf(stderr, ", odd dir item");
788         if (errors & I_ERR_ODD_FILE_EXTENT)
789                 fprintf(stderr, ", odd file extent");
790         if (errors & I_ERR_BAD_FILE_EXTENT)
791                 fprintf(stderr, ", bad file extent");
792         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
793                 fprintf(stderr, ", file extent overlap");
794         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
795                 fprintf(stderr, ", file extent discount");
796         if (errors & I_ERR_DIR_ISIZE_WRONG)
797                 fprintf(stderr, ", dir isize wrong");
798         if (errors & I_ERR_FILE_NBYTES_WRONG)
799                 fprintf(stderr, ", nbytes wrong");
800         if (errors & I_ERR_ODD_CSUM_ITEM)
801                 fprintf(stderr, ", odd csum item");
802         if (errors & I_ERR_SOME_CSUM_MISSING)
803                 fprintf(stderr, ", some csum missing");
804         if (errors & I_ERR_LINK_COUNT_WRONG)
805                 fprintf(stderr, ", link count wrong");
806         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
807                 fprintf(stderr, ", orphan file extent");
808         fprintf(stderr, "\n");
809         /* Print the orphan extents if needed */
810         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
811                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
812
813         /* Print the holes if needed */
814         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
815                 struct file_extent_hole *hole;
816                 struct rb_node *node;
817                 int found = 0;
818
819                 node = rb_first(&rec->holes);
820                 fprintf(stderr, "Found file extent holes:\n");
821                 while (node) {
822                         found = 1;
823                         hole = rb_entry(node, struct file_extent_hole, node);
824                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
825                                 hole->start, hole->len);
826                         node = rb_next(node);
827                 }
828                 if (!found)
829                         fprintf(stderr, "\tstart: 0, len: %llu\n",
830                                 round_up(rec->isize,
831                                          root->fs_info->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct extent_buffer *eb,
1481                             int slot, struct btrfs_key *key,
1482                             struct shared_node *active_node)
1483 {
1484         u32 total;
1485         u32 cur = 0;
1486         u32 len;
1487         u32 name_len;
1488         u32 data_len;
1489         int error;
1490         int nritems = 0;
1491         u8 filetype;
1492         struct btrfs_dir_item *di;
1493         struct inode_record *rec;
1494         struct cache_tree *root_cache;
1495         struct cache_tree *inode_cache;
1496         struct btrfs_key location;
1497         char namebuf[BTRFS_NAME_LEN];
1498
1499         root_cache = &active_node->root_cache;
1500         inode_cache = &active_node->inode_cache;
1501         rec = active_node->current;
1502         rec->found_dir_item = 1;
1503
1504         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1505         total = btrfs_item_size_nr(eb, slot);
1506         while (cur < total) {
1507                 nritems++;
1508                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1509                 name_len = btrfs_dir_name_len(eb, di);
1510                 data_len = btrfs_dir_data_len(eb, di);
1511                 filetype = btrfs_dir_type(eb, di);
1512
1513                 rec->found_size += name_len;
1514                 if (cur + sizeof(*di) + name_len > total ||
1515                     name_len > BTRFS_NAME_LEN) {
1516                         error = REF_ERR_NAME_TOO_LONG;
1517
1518                         if (cur + sizeof(*di) > total)
1519                                 break;
1520                         len = min_t(u32, total - cur - sizeof(*di),
1521                                     BTRFS_NAME_LEN);
1522                 } else {
1523                         len = name_len;
1524                         error = 0;
1525                 }
1526
1527                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1528
1529                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1530                     key->offset != btrfs_name_hash(namebuf, len)) {
1531                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1532                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1533                         key->objectid, key->offset, namebuf, len, filetype,
1534                         key->offset, btrfs_name_hash(namebuf, len));
1535                 }
1536
1537                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1538                         add_inode_backref(inode_cache, location.objectid,
1539                                           key->objectid, key->offset, namebuf,
1540                                           len, filetype, key->type, error);
1541                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1542                         add_inode_backref(root_cache, location.objectid,
1543                                           key->objectid, key->offset,
1544                                           namebuf, len, filetype,
1545                                           key->type, error);
1546                 } else {
1547                         fprintf(stderr, "invalid location in dir item %u\n",
1548                                 location.type);
1549                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1550                                           key->objectid, key->offset, namebuf,
1551                                           len, filetype, key->type, error);
1552                 }
1553
1554                 len = sizeof(*di) + name_len + data_len;
1555                 di = (struct btrfs_dir_item *)((char *)di + len);
1556                 cur += len;
1557         }
1558         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1559                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1560
1561         return 0;
1562 }
1563
1564 static int process_inode_ref(struct extent_buffer *eb,
1565                              int slot, struct btrfs_key *key,
1566                              struct shared_node *active_node)
1567 {
1568         u32 total;
1569         u32 cur = 0;
1570         u32 len;
1571         u32 name_len;
1572         u64 index;
1573         int error;
1574         struct cache_tree *inode_cache;
1575         struct btrfs_inode_ref *ref;
1576         char namebuf[BTRFS_NAME_LEN];
1577
1578         inode_cache = &active_node->inode_cache;
1579
1580         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1581         total = btrfs_item_size_nr(eb, slot);
1582         while (cur < total) {
1583                 name_len = btrfs_inode_ref_name_len(eb, ref);
1584                 index = btrfs_inode_ref_index(eb, ref);
1585
1586                 /* inode_ref + namelen should not cross item boundary */
1587                 if (cur + sizeof(*ref) + name_len > total ||
1588                     name_len > BTRFS_NAME_LEN) {
1589                         if (total < cur + sizeof(*ref))
1590                                 break;
1591
1592                         /* Still try to read out the remaining part */
1593                         len = min_t(u32, total - cur - sizeof(*ref),
1594                                     BTRFS_NAME_LEN);
1595                         error = REF_ERR_NAME_TOO_LONG;
1596                 } else {
1597                         len = name_len;
1598                         error = 0;
1599                 }
1600
1601                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1602                 add_inode_backref(inode_cache, key->objectid, key->offset,
1603                                   index, namebuf, len, 0, key->type, error);
1604
1605                 len = sizeof(*ref) + name_len;
1606                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1607                 cur += len;
1608         }
1609         return 0;
1610 }
1611
1612 static int process_inode_extref(struct extent_buffer *eb,
1613                                 int slot, struct btrfs_key *key,
1614                                 struct shared_node *active_node)
1615 {
1616         u32 total;
1617         u32 cur = 0;
1618         u32 len;
1619         u32 name_len;
1620         u64 index;
1621         u64 parent;
1622         int error;
1623         struct cache_tree *inode_cache;
1624         struct btrfs_inode_extref *extref;
1625         char namebuf[BTRFS_NAME_LEN];
1626
1627         inode_cache = &active_node->inode_cache;
1628
1629         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1630         total = btrfs_item_size_nr(eb, slot);
1631         while (cur < total) {
1632                 name_len = btrfs_inode_extref_name_len(eb, extref);
1633                 index = btrfs_inode_extref_index(eb, extref);
1634                 parent = btrfs_inode_extref_parent(eb, extref);
1635                 if (name_len <= BTRFS_NAME_LEN) {
1636                         len = name_len;
1637                         error = 0;
1638                 } else {
1639                         len = BTRFS_NAME_LEN;
1640                         error = REF_ERR_NAME_TOO_LONG;
1641                 }
1642                 read_extent_buffer(eb, namebuf,
1643                                    (unsigned long)(extref + 1), len);
1644                 add_inode_backref(inode_cache, key->objectid, parent,
1645                                   index, namebuf, len, 0, key->type, error);
1646
1647                 len = sizeof(*extref) + name_len;
1648                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1649                 cur += len;
1650         }
1651         return 0;
1652
1653 }
1654
1655 static int count_csum_range(struct btrfs_root *root, u64 start,
1656                             u64 len, u64 *found)
1657 {
1658         struct btrfs_key key;
1659         struct btrfs_path path;
1660         struct extent_buffer *leaf;
1661         int ret;
1662         size_t size;
1663         *found = 0;
1664         u64 csum_end;
1665         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1666
1667         btrfs_init_path(&path);
1668
1669         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1670         key.offset = start;
1671         key.type = BTRFS_EXTENT_CSUM_KEY;
1672
1673         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1674                                 &key, &path, 0, 0);
1675         if (ret < 0)
1676                 goto out;
1677         if (ret > 0 && path.slots[0] > 0) {
1678                 leaf = path.nodes[0];
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1680                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1681                     key.type == BTRFS_EXTENT_CSUM_KEY)
1682                         path.slots[0]--;
1683         }
1684
1685         while (len > 0) {
1686                 leaf = path.nodes[0];
1687                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1688                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1689                         if (ret > 0)
1690                                 break;
1691                         else if (ret < 0)
1692                                 goto out;
1693                         leaf = path.nodes[0];
1694                 }
1695
1696                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1697                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1698                     key.type != BTRFS_EXTENT_CSUM_KEY)
1699                         break;
1700
1701                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1702                 if (key.offset >= start + len)
1703                         break;
1704
1705                 if (key.offset > start)
1706                         start = key.offset;
1707
1708                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1709                 csum_end = key.offset + (size / csum_size) *
1710                            root->fs_info->sectorsize;
1711                 if (csum_end > start) {
1712                         size = min(csum_end - start, len);
1713                         len -= size;
1714                         start += size;
1715                         *found += size;
1716                 }
1717
1718                 path.slots[0]++;
1719         }
1720 out:
1721         btrfs_release_path(&path);
1722         if (ret < 0)
1723                 return ret;
1724         return 0;
1725 }
1726
1727 static int process_file_extent(struct btrfs_root *root,
1728                                 struct extent_buffer *eb,
1729                                 int slot, struct btrfs_key *key,
1730                                 struct shared_node *active_node)
1731 {
1732         struct inode_record *rec;
1733         struct btrfs_file_extent_item *fi;
1734         u64 num_bytes = 0;
1735         u64 disk_bytenr = 0;
1736         u64 extent_offset = 0;
1737         u64 mask = root->fs_info->sectorsize - 1;
1738         int extent_type;
1739         int ret;
1740
1741         rec = active_node->current;
1742         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743         rec->found_file_extent = 1;
1744
1745         if (rec->extent_start == (u64)-1) {
1746                 rec->extent_start = key->offset;
1747                 rec->extent_end = key->offset;
1748         }
1749
1750         if (rec->extent_end > key->offset)
1751                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752         else if (rec->extent_end < key->offset) {
1753                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754                                            key->offset - rec->extent_end);
1755                 if (ret < 0)
1756                         return ret;
1757         }
1758
1759         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760         extent_type = btrfs_file_extent_type(eb, fi);
1761
1762         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1764                 if (num_bytes == 0)
1765                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766                 rec->found_size += num_bytes;
1767                 num_bytes = (num_bytes + mask) & ~mask;
1768         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772                 extent_offset = btrfs_file_extent_offset(eb, fi);
1773                 if (num_bytes == 0 || (num_bytes & mask))
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 if (num_bytes + extent_offset >
1776                     btrfs_file_extent_ram_bytes(eb, fi))
1777                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779                     (btrfs_file_extent_compression(eb, fi) ||
1780                      btrfs_file_extent_encryption(eb, fi) ||
1781                      btrfs_file_extent_other_encoding(eb, fi)))
1782                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783                 if (disk_bytenr > 0)
1784                         rec->found_size += num_bytes;
1785         } else {
1786                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787         }
1788         rec->extent_end = key->offset + num_bytes;
1789
1790         /*
1791          * The data reloc tree will copy full extents into its inode and then
1792          * copy the corresponding csums.  Because the extent it copied could be
1793          * a preallocated extent that hasn't been written to yet there may be no
1794          * csums to copy, ergo we won't have csums for our file extent.  This is
1795          * ok so just don't bother checking csums if the inode belongs to the
1796          * data reloc tree.
1797          */
1798         if (disk_bytenr > 0 &&
1799             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1800                 u64 found;
1801                 if (btrfs_file_extent_compression(eb, fi))
1802                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1803                 else
1804                         disk_bytenr += extent_offset;
1805
1806                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1807                 if (ret < 0)
1808                         return ret;
1809                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1810                         if (found > 0)
1811                                 rec->found_csum_item = 1;
1812                         if (found < num_bytes)
1813                                 rec->some_csum_missing = 1;
1814                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1815                         if (found > 0)
1816                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1817                 }
1818         }
1819         return 0;
1820 }
1821
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823                             struct walk_control *wc)
1824 {
1825         struct btrfs_key key;
1826         u32 nritems;
1827         int i;
1828         int ret = 0;
1829         struct cache_tree *inode_cache;
1830         struct shared_node *active_node;
1831
1832         if (wc->root_level == wc->active_node &&
1833             btrfs_root_refs(&root->root_item) == 0)
1834                 return 0;
1835
1836         active_node = wc->nodes[wc->active_node];
1837         inode_cache = &active_node->inode_cache;
1838         nritems = btrfs_header_nritems(eb);
1839         for (i = 0; i < nritems; i++) {
1840                 btrfs_item_key_to_cpu(eb, &key, i);
1841
1842                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1843                         continue;
1844                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1845                         continue;
1846
1847                 if (active_node->current == NULL ||
1848                     active_node->current->ino < key.objectid) {
1849                         if (active_node->current) {
1850                                 active_node->current->checked = 1;
1851                                 maybe_free_inode_rec(inode_cache,
1852                                                      active_node->current);
1853                         }
1854                         active_node->current = get_inode_rec(inode_cache,
1855                                                              key.objectid, 1);
1856                         BUG_ON(IS_ERR(active_node->current));
1857                 }
1858                 switch (key.type) {
1859                 case BTRFS_DIR_ITEM_KEY:
1860                 case BTRFS_DIR_INDEX_KEY:
1861                         ret = process_dir_item(eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_REF_KEY:
1864                         ret = process_inode_ref(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_INODE_EXTREF_KEY:
1867                         ret = process_inode_extref(eb, i, &key, active_node);
1868                         break;
1869                 case BTRFS_INODE_ITEM_KEY:
1870                         ret = process_inode_item(eb, i, &key, active_node);
1871                         break;
1872                 case BTRFS_EXTENT_DATA_KEY:
1873                         ret = process_file_extent(root, eb, i, &key,
1874                                                   active_node);
1875                         break;
1876                 default:
1877                         break;
1878                 };
1879         }
1880         return ret;
1881 }
1882
1883 struct node_refs {
1884         u64 bytenr[BTRFS_MAX_LEVEL];
1885         u64 refs[BTRFS_MAX_LEVEL];
1886         int need_check[BTRFS_MAX_LEVEL];
1887 };
1888
1889 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1890                              struct node_refs *nrefs, u64 level);
1891 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1892                             unsigned int ext_ref);
1893
1894 /*
1895  * Returns >0  Found error, not fatal, should continue
1896  * Returns <0  Fatal error, must exit the whole check
1897  * Returns 0   No errors found
1898  */
1899 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1900                                struct node_refs *nrefs, int *level, int ext_ref)
1901 {
1902         struct extent_buffer *cur = path->nodes[0];
1903         struct btrfs_key key;
1904         u64 cur_bytenr;
1905         u32 nritems;
1906         u64 first_ino = 0;
1907         int root_level = btrfs_header_level(root->node);
1908         int i;
1909         int ret = 0; /* Final return value */
1910         int err = 0; /* Positive error bitmap */
1911
1912         cur_bytenr = cur->start;
1913
1914         /* skip to first inode item or the first inode number change */
1915         nritems = btrfs_header_nritems(cur);
1916         for (i = 0; i < nritems; i++) {
1917                 btrfs_item_key_to_cpu(cur, &key, i);
1918                 if (i == 0)
1919                         first_ino = key.objectid;
1920                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1921                     (first_ino && first_ino != key.objectid))
1922                         break;
1923         }
1924         if (i == nritems) {
1925                 path->slots[0] = nritems;
1926                 return 0;
1927         }
1928         path->slots[0] = i;
1929
1930 again:
1931         err |= check_inode_item(root, path, ext_ref);
1932
1933         if (err & LAST_ITEM)
1934                 goto out;
1935
1936         /* still have inode items in thie leaf */
1937         if (cur->start == cur_bytenr)
1938                 goto again;
1939
1940         /*
1941          * we have switched to another leaf, above nodes may
1942          * have changed, here walk down the path, if a node
1943          * or leaf is shared, check whether we can skip this
1944          * node or leaf.
1945          */
1946         for (i = root_level; i >= 0; i--) {
1947                 if (path->nodes[i]->start == nrefs->bytenr[i])
1948                         continue;
1949
1950                 ret = update_nodes_refs(root,
1951                                 path->nodes[i]->start,
1952                                 nrefs, i);
1953                 if (ret)
1954                         goto out;
1955
1956                 if (!nrefs->need_check[i]) {
1957                         *level += 1;
1958                         break;
1959                 }
1960         }
1961
1962         for (i = 0; i < *level; i++) {
1963                 free_extent_buffer(path->nodes[i]);
1964                 path->nodes[i] = NULL;
1965         }
1966 out:
1967         err &= ~LAST_ITEM;
1968         if (err && !ret)
1969                 ret = err;
1970         return ret;
1971 }
1972
1973 static void reada_walk_down(struct btrfs_root *root,
1974                             struct extent_buffer *node, int slot)
1975 {
1976         struct btrfs_fs_info *fs_info = root->fs_info;
1977         u64 bytenr;
1978         u64 ptr_gen;
1979         u32 nritems;
1980         int i;
1981         int level;
1982
1983         level = btrfs_header_level(node);
1984         if (level != 1)
1985                 return;
1986
1987         nritems = btrfs_header_nritems(node);
1988         for (i = slot; i < nritems; i++) {
1989                 bytenr = btrfs_node_blockptr(node, i);
1990                 ptr_gen = btrfs_node_ptr_generation(node, i);
1991                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1992         }
1993 }
1994
1995 /*
1996  * Check the child node/leaf by the following condition:
1997  * 1. the first item key of the node/leaf should be the same with the one
1998  *    in parent.
1999  * 2. block in parent node should match the child node/leaf.
2000  * 3. generation of parent node and child's header should be consistent.
2001  *
2002  * Or the child node/leaf pointed by the key in parent is not valid.
2003  *
2004  * We hope to check leaf owner too, but since subvol may share leaves,
2005  * which makes leaf owner check not so strong, key check should be
2006  * sufficient enough for that case.
2007  */
2008 static int check_child_node(struct extent_buffer *parent, int slot,
2009                             struct extent_buffer *child)
2010 {
2011         struct btrfs_key parent_key;
2012         struct btrfs_key child_key;
2013         int ret = 0;
2014
2015         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2016         if (btrfs_header_level(child) == 0)
2017                 btrfs_item_key_to_cpu(child, &child_key, 0);
2018         else
2019                 btrfs_node_key_to_cpu(child, &child_key, 0);
2020
2021         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2022                 ret = -EINVAL;
2023                 fprintf(stderr,
2024                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2025                         parent_key.objectid, parent_key.type, parent_key.offset,
2026                         child_key.objectid, child_key.type, child_key.offset);
2027         }
2028         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2029                 ret = -EINVAL;
2030                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2031                         btrfs_node_blockptr(parent, slot),
2032                         btrfs_header_bytenr(child));
2033         }
2034         if (btrfs_node_ptr_generation(parent, slot) !=
2035             btrfs_header_generation(child)) {
2036                 ret = -EINVAL;
2037                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2038                         btrfs_header_generation(child),
2039                         btrfs_node_ptr_generation(parent, slot));
2040         }
2041         return ret;
2042 }
2043
2044 /*
2045  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2046  * in every fs or file tree check. Here we find its all root ids, and only check
2047  * it in the fs or file tree which has the smallest root id.
2048  */
2049 static int need_check(struct btrfs_root *root, struct ulist *roots)
2050 {
2051         struct rb_node *node;
2052         struct ulist_node *u;
2053
2054         if (roots->nnodes == 1)
2055                 return 1;
2056
2057         node = rb_first(&roots->root);
2058         u = rb_entry(node, struct ulist_node, rb_node);
2059         /*
2060          * current root id is not smallest, we skip it and let it be checked
2061          * in the fs or file tree who hash the smallest root id.
2062          */
2063         if (root->objectid != u->val)
2064                 return 0;
2065
2066         return 1;
2067 }
2068
2069 /*
2070  * for a tree node or leaf, we record its reference count, so later if we still
2071  * process this node or leaf, don't need to compute its reference count again.
2072  */
2073 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2074                              struct node_refs *nrefs, u64 level)
2075 {
2076         int check, ret;
2077         u64 refs;
2078         struct ulist *roots;
2079
2080         if (nrefs->bytenr[level] != bytenr) {
2081                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2082                                        level, 1, &refs, NULL);
2083                 if (ret < 0)
2084                         return ret;
2085
2086                 nrefs->bytenr[level] = bytenr;
2087                 nrefs->refs[level] = refs;
2088                 if (refs > 1) {
2089                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2090                                                    0, &roots);
2091                         if (ret)
2092                                 return -EIO;
2093
2094                         check = need_check(root, roots);
2095                         ulist_free(roots);
2096                         nrefs->need_check[level] = check;
2097                 } else {
2098                         nrefs->need_check[level] = 1;
2099                 }
2100         }
2101
2102         return 0;
2103 }
2104
2105 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2106                           struct walk_control *wc, int *level,
2107                           struct node_refs *nrefs)
2108 {
2109         enum btrfs_tree_block_status status;
2110         u64 bytenr;
2111         u64 ptr_gen;
2112         struct btrfs_fs_info *fs_info = root->fs_info;
2113         struct extent_buffer *next;
2114         struct extent_buffer *cur;
2115         int ret, err = 0;
2116         u64 refs;
2117
2118         WARN_ON(*level < 0);
2119         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2120
2121         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2122                 refs = nrefs->refs[*level];
2123                 ret = 0;
2124         } else {
2125                 ret = btrfs_lookup_extent_info(NULL, root,
2126                                        path->nodes[*level]->start,
2127                                        *level, 1, &refs, NULL);
2128                 if (ret < 0) {
2129                         err = ret;
2130                         goto out;
2131                 }
2132                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2133                 nrefs->refs[*level] = refs;
2134         }
2135
2136         if (refs > 1) {
2137                 ret = enter_shared_node(root, path->nodes[*level]->start,
2138                                         refs, wc, *level);
2139                 if (ret > 0) {
2140                         err = ret;
2141                         goto out;
2142                 }
2143         }
2144
2145         while (*level >= 0) {
2146                 WARN_ON(*level < 0);
2147                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2148                 cur = path->nodes[*level];
2149
2150                 if (btrfs_header_level(cur) != *level)
2151                         WARN_ON(1);
2152
2153                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2154                         break;
2155                 if (*level == 0) {
2156                         ret = process_one_leaf(root, cur, wc);
2157                         if (ret < 0)
2158                                 err = ret;
2159                         break;
2160                 }
2161                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2162                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2163
2164                 if (bytenr == nrefs->bytenr[*level - 1]) {
2165                         refs = nrefs->refs[*level - 1];
2166                 } else {
2167                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2168                                         *level - 1, 1, &refs, NULL);
2169                         if (ret < 0) {
2170                                 refs = 0;
2171                         } else {
2172                                 nrefs->bytenr[*level - 1] = bytenr;
2173                                 nrefs->refs[*level - 1] = refs;
2174                         }
2175                 }
2176
2177                 if (refs > 1) {
2178                         ret = enter_shared_node(root, bytenr, refs,
2179                                                 wc, *level - 1);
2180                         if (ret > 0) {
2181                                 path->slots[*level]++;
2182                                 continue;
2183                         }
2184                 }
2185
2186                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2187                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2188                         free_extent_buffer(next);
2189                         reada_walk_down(root, cur, path->slots[*level]);
2190                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2191                         if (!extent_buffer_uptodate(next)) {
2192                                 struct btrfs_key node_key;
2193
2194                                 btrfs_node_key_to_cpu(path->nodes[*level],
2195                                                       &node_key,
2196                                                       path->slots[*level]);
2197                                 btrfs_add_corrupt_extent_record(root->fs_info,
2198                                                 &node_key,
2199                                                 path->nodes[*level]->start,
2200                                                 root->fs_info->nodesize,
2201                                                 *level);
2202                                 err = -EIO;
2203                                 goto out;
2204                         }
2205                 }
2206
2207                 ret = check_child_node(cur, path->slots[*level], next);
2208                 if (ret) {
2209                         free_extent_buffer(next);
2210                         err = ret;
2211                         goto out;
2212                 }
2213
2214                 if (btrfs_is_leaf(next))
2215                         status = btrfs_check_leaf(root, NULL, next);
2216                 else
2217                         status = btrfs_check_node(root, NULL, next);
2218                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2219                         free_extent_buffer(next);
2220                         err = -EIO;
2221                         goto out;
2222                 }
2223
2224                 *level = *level - 1;
2225                 free_extent_buffer(path->nodes[*level]);
2226                 path->nodes[*level] = next;
2227                 path->slots[*level] = 0;
2228         }
2229 out:
2230         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2231         return err;
2232 }
2233
2234 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2235                             unsigned int ext_ref);
2236
2237 /*
2238  * Returns >0  Found error, should continue
2239  * Returns <0  Fatal error, must exit the whole check
2240  * Returns 0   No errors found
2241  */
2242 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2243                              int *level, struct node_refs *nrefs, int ext_ref)
2244 {
2245         enum btrfs_tree_block_status status;
2246         u64 bytenr;
2247         u64 ptr_gen;
2248         struct btrfs_fs_info *fs_info = root->fs_info;
2249         struct extent_buffer *next;
2250         struct extent_buffer *cur;
2251         int ret;
2252
2253         WARN_ON(*level < 0);
2254         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2255
2256         ret = update_nodes_refs(root, path->nodes[*level]->start,
2257                                 nrefs, *level);
2258         if (ret < 0)
2259                 return ret;
2260
2261         while (*level >= 0) {
2262                 WARN_ON(*level < 0);
2263                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264                 cur = path->nodes[*level];
2265
2266                 if (btrfs_header_level(cur) != *level)
2267                         WARN_ON(1);
2268
2269                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2270                         break;
2271                 /* Don't forgot to check leaf/node validation */
2272                 if (*level == 0) {
2273                         ret = btrfs_check_leaf(root, NULL, cur);
2274                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2275                                 ret = -EIO;
2276                                 break;
2277                         }
2278                         ret = process_one_leaf_v2(root, path, nrefs,
2279                                                   level, ext_ref);
2280                         break;
2281                 } else {
2282                         ret = btrfs_check_node(root, NULL, cur);
2283                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2284                                 ret = -EIO;
2285                                 break;
2286                         }
2287                 }
2288                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2290
2291                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2292                 if (ret)
2293                         break;
2294                 if (!nrefs->need_check[*level - 1]) {
2295                         path->slots[*level]++;
2296                         continue;
2297                 }
2298
2299                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2300                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301                         free_extent_buffer(next);
2302                         reada_walk_down(root, cur, path->slots[*level]);
2303                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2304                         if (!extent_buffer_uptodate(next)) {
2305                                 struct btrfs_key node_key;
2306
2307                                 btrfs_node_key_to_cpu(path->nodes[*level],
2308                                                       &node_key,
2309                                                       path->slots[*level]);
2310                                 btrfs_add_corrupt_extent_record(fs_info,
2311                                                 &node_key,
2312                                                 path->nodes[*level]->start,
2313                                                 fs_info->nodesize,
2314                                                 *level);
2315                                 ret = -EIO;
2316                                 break;
2317                         }
2318                 }
2319
2320                 ret = check_child_node(cur, path->slots[*level], next);
2321                 if (ret < 0) 
2322                         break;
2323
2324                 if (btrfs_is_leaf(next))
2325                         status = btrfs_check_leaf(root, NULL, next);
2326                 else
2327                         status = btrfs_check_node(root, NULL, next);
2328                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2329                         free_extent_buffer(next);
2330                         ret = -EIO;
2331                         break;
2332                 }
2333
2334                 *level = *level - 1;
2335                 free_extent_buffer(path->nodes[*level]);
2336                 path->nodes[*level] = next;
2337                 path->slots[*level] = 0;
2338         }
2339         return ret;
2340 }
2341
2342 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2343                         struct walk_control *wc, int *level)
2344 {
2345         int i;
2346         struct extent_buffer *leaf;
2347
2348         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349                 leaf = path->nodes[i];
2350                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2351                         path->slots[i]++;
2352                         *level = i;
2353                         return 0;
2354                 } else {
2355                         free_extent_buffer(path->nodes[*level]);
2356                         path->nodes[*level] = NULL;
2357                         BUG_ON(*level > wc->active_node);
2358                         if (*level == wc->active_node)
2359                                 leave_shared_node(root, wc, *level);
2360                         *level = i + 1;
2361                 }
2362         }
2363         return 1;
2364 }
2365
2366 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2367                            int *level)
2368 {
2369         int i;
2370         struct extent_buffer *leaf;
2371
2372         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2373                 leaf = path->nodes[i];
2374                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2375                         path->slots[i]++;
2376                         *level = i;
2377                         return 0;
2378                 } else {
2379                         free_extent_buffer(path->nodes[*level]);
2380                         path->nodes[*level] = NULL;
2381                         *level = i + 1;
2382                 }
2383         }
2384         return 1;
2385 }
2386
2387 static int check_root_dir(struct inode_record *rec)
2388 {
2389         struct inode_backref *backref;
2390         int ret = -1;
2391
2392         if (!rec->found_inode_item || rec->errors)
2393                 goto out;
2394         if (rec->nlink != 1 || rec->found_link != 0)
2395                 goto out;
2396         if (list_empty(&rec->backrefs))
2397                 goto out;
2398         backref = to_inode_backref(rec->backrefs.next);
2399         if (!backref->found_inode_ref)
2400                 goto out;
2401         if (backref->index != 0 || backref->namelen != 2 ||
2402             memcmp(backref->name, "..", 2))
2403                 goto out;
2404         if (backref->found_dir_index || backref->found_dir_item)
2405                 goto out;
2406         ret = 0;
2407 out:
2408         return ret;
2409 }
2410
2411 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2412                               struct btrfs_root *root, struct btrfs_path *path,
2413                               struct inode_record *rec)
2414 {
2415         struct btrfs_inode_item *ei;
2416         struct btrfs_key key;
2417         int ret;
2418
2419         key.objectid = rec->ino;
2420         key.type = BTRFS_INODE_ITEM_KEY;
2421         key.offset = (u64)-1;
2422
2423         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2424         if (ret < 0)
2425                 goto out;
2426         if (ret) {
2427                 if (!path->slots[0]) {
2428                         ret = -ENOENT;
2429                         goto out;
2430                 }
2431                 path->slots[0]--;
2432                 ret = 0;
2433         }
2434         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2435         if (key.objectid != rec->ino) {
2436                 ret = -ENOENT;
2437                 goto out;
2438         }
2439
2440         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2441                             struct btrfs_inode_item);
2442         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2443         btrfs_mark_buffer_dirty(path->nodes[0]);
2444         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2445         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2446                root->root_key.objectid);
2447 out:
2448         btrfs_release_path(path);
2449         return ret;
2450 }
2451
2452 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2453                                     struct btrfs_root *root,
2454                                     struct btrfs_path *path,
2455                                     struct inode_record *rec)
2456 {
2457         int ret;
2458
2459         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2460         btrfs_release_path(path);
2461         if (!ret)
2462                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2463         return ret;
2464 }
2465
2466 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2467                                struct btrfs_root *root,
2468                                struct btrfs_path *path,
2469                                struct inode_record *rec)
2470 {
2471         struct btrfs_inode_item *ei;
2472         struct btrfs_key key;
2473         int ret = 0;
2474
2475         key.objectid = rec->ino;
2476         key.type = BTRFS_INODE_ITEM_KEY;
2477         key.offset = 0;
2478
2479         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2480         if (ret) {
2481                 if (ret > 0)
2482                         ret = -ENOENT;
2483                 goto out;
2484         }
2485
2486         /* Since ret == 0, no need to check anything */
2487         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2488                             struct btrfs_inode_item);
2489         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2490         btrfs_mark_buffer_dirty(path->nodes[0]);
2491         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2492         printf("reset nbytes for ino %llu root %llu\n",
2493                rec->ino, root->root_key.objectid);
2494 out:
2495         btrfs_release_path(path);
2496         return ret;
2497 }
2498
2499 static int add_missing_dir_index(struct btrfs_root *root,
2500                                  struct cache_tree *inode_cache,
2501                                  struct inode_record *rec,
2502                                  struct inode_backref *backref)
2503 {
2504         struct btrfs_path path;
2505         struct btrfs_trans_handle *trans;
2506         struct btrfs_dir_item *dir_item;
2507         struct extent_buffer *leaf;
2508         struct btrfs_key key;
2509         struct btrfs_disk_key disk_key;
2510         struct inode_record *dir_rec;
2511         unsigned long name_ptr;
2512         u32 data_size = sizeof(*dir_item) + backref->namelen;
2513         int ret;
2514
2515         trans = btrfs_start_transaction(root, 1);
2516         if (IS_ERR(trans))
2517                 return PTR_ERR(trans);
2518
2519         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2520                 (unsigned long long)rec->ino);
2521
2522         btrfs_init_path(&path);
2523         key.objectid = backref->dir;
2524         key.type = BTRFS_DIR_INDEX_KEY;
2525         key.offset = backref->index;
2526         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2527         BUG_ON(ret);
2528
2529         leaf = path.nodes[0];
2530         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2531
2532         disk_key.objectid = cpu_to_le64(rec->ino);
2533         disk_key.type = BTRFS_INODE_ITEM_KEY;
2534         disk_key.offset = 0;
2535
2536         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2537         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2538         btrfs_set_dir_data_len(leaf, dir_item, 0);
2539         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2540         name_ptr = (unsigned long)(dir_item + 1);
2541         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2542         btrfs_mark_buffer_dirty(leaf);
2543         btrfs_release_path(&path);
2544         btrfs_commit_transaction(trans, root);
2545
2546         backref->found_dir_index = 1;
2547         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2548         BUG_ON(IS_ERR(dir_rec));
2549         if (!dir_rec)
2550                 return 0;
2551         dir_rec->found_size += backref->namelen;
2552         if (dir_rec->found_size == dir_rec->isize &&
2553             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2554                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2555         if (dir_rec->found_size != dir_rec->isize)
2556                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2557
2558         return 0;
2559 }
2560
2561 static int delete_dir_index(struct btrfs_root *root,
2562                             struct inode_backref *backref)
2563 {
2564         struct btrfs_trans_handle *trans;
2565         struct btrfs_dir_item *di;
2566         struct btrfs_path path;
2567         int ret = 0;
2568
2569         trans = btrfs_start_transaction(root, 1);
2570         if (IS_ERR(trans))
2571                 return PTR_ERR(trans);
2572
2573         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2574                 (unsigned long long)backref->dir,
2575                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2576                 (unsigned long long)root->objectid);
2577
2578         btrfs_init_path(&path);
2579         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2580                                     backref->name, backref->namelen,
2581                                     backref->index, -1);
2582         if (IS_ERR(di)) {
2583                 ret = PTR_ERR(di);
2584                 btrfs_release_path(&path);
2585                 btrfs_commit_transaction(trans, root);
2586                 if (ret == -ENOENT)
2587                         return 0;
2588                 return ret;
2589         }
2590
2591         if (!di)
2592                 ret = btrfs_del_item(trans, root, &path);
2593         else
2594                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2595         BUG_ON(ret);
2596         btrfs_release_path(&path);
2597         btrfs_commit_transaction(trans, root);
2598         return ret;
2599 }
2600
2601 static int create_inode_item(struct btrfs_root *root,
2602                              struct inode_record *rec,
2603                              int root_dir)
2604 {
2605         struct btrfs_trans_handle *trans;
2606         struct btrfs_inode_item inode_item;
2607         time_t now = time(NULL);
2608         int ret;
2609
2610         trans = btrfs_start_transaction(root, 1);
2611         if (IS_ERR(trans)) {
2612                 ret = PTR_ERR(trans);
2613                 return ret;
2614         }
2615
2616         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2617                 "be incomplete, please check permissions and content after "
2618                 "the fsck completes.\n", (unsigned long long)root->objectid,
2619                 (unsigned long long)rec->ino);
2620
2621         memset(&inode_item, 0, sizeof(inode_item));
2622         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2623         if (root_dir)
2624                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2625         else
2626                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2627         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2628         if (rec->found_dir_item) {
2629                 if (rec->found_file_extent)
2630                         fprintf(stderr, "root %llu inode %llu has both a dir "
2631                                 "item and extents, unsure if it is a dir or a "
2632                                 "regular file so setting it as a directory\n",
2633                                 (unsigned long long)root->objectid,
2634                                 (unsigned long long)rec->ino);
2635                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2636                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2637         } else if (!rec->found_dir_item) {
2638                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2639                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2640         }
2641         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2642         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2643         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2644         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2645         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2646         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2647         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2648         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2649
2650         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2651         BUG_ON(ret);
2652         btrfs_commit_transaction(trans, root);
2653         return 0;
2654 }
2655
2656 static int repair_inode_backrefs(struct btrfs_root *root,
2657                                  struct inode_record *rec,
2658                                  struct cache_tree *inode_cache,
2659                                  int delete)
2660 {
2661         struct inode_backref *tmp, *backref;
2662         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2663         int ret = 0;
2664         int repaired = 0;
2665
2666         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2667                 if (!delete && rec->ino == root_dirid) {
2668                         if (!rec->found_inode_item) {
2669                                 ret = create_inode_item(root, rec, 1);
2670                                 if (ret)
2671                                         break;
2672                                 repaired++;
2673                         }
2674                 }
2675
2676                 /* Index 0 for root dir's are special, don't mess with it */
2677                 if (rec->ino == root_dirid && backref->index == 0)
2678                         continue;
2679
2680                 if (delete &&
2681                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2682                      (backref->found_dir_index && backref->found_inode_ref &&
2683                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2684                         ret = delete_dir_index(root, backref);
2685                         if (ret)
2686                                 break;
2687                         repaired++;
2688                         list_del(&backref->list);
2689                         free(backref);
2690                         continue;
2691                 }
2692
2693                 if (!delete && !backref->found_dir_index &&
2694                     backref->found_dir_item && backref->found_inode_ref) {
2695                         ret = add_missing_dir_index(root, inode_cache, rec,
2696                                                     backref);
2697                         if (ret)
2698                                 break;
2699                         repaired++;
2700                         if (backref->found_dir_item &&
2701                             backref->found_dir_index) {
2702                                 if (!backref->errors &&
2703                                     backref->found_inode_ref) {
2704                                         list_del(&backref->list);
2705                                         free(backref);
2706                                         continue;
2707                                 }
2708                         }
2709                 }
2710
2711                 if (!delete && (!backref->found_dir_index &&
2712                                 !backref->found_dir_item &&
2713                                 backref->found_inode_ref)) {
2714                         struct btrfs_trans_handle *trans;
2715                         struct btrfs_key location;
2716
2717                         ret = check_dir_conflict(root, backref->name,
2718                                                  backref->namelen,
2719                                                  backref->dir,
2720                                                  backref->index);
2721                         if (ret) {
2722                                 /*
2723                                  * let nlink fixing routine to handle it,
2724                                  * which can do it better.
2725                                  */
2726                                 ret = 0;
2727                                 break;
2728                         }
2729                         location.objectid = rec->ino;
2730                         location.type = BTRFS_INODE_ITEM_KEY;
2731                         location.offset = 0;
2732
2733                         trans = btrfs_start_transaction(root, 1);
2734                         if (IS_ERR(trans)) {
2735                                 ret = PTR_ERR(trans);
2736                                 break;
2737                         }
2738                         fprintf(stderr, "adding missing dir index/item pair "
2739                                 "for inode %llu\n",
2740                                 (unsigned long long)rec->ino);
2741                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2742                                                     backref->namelen,
2743                                                     backref->dir, &location,
2744                                                     imode_to_type(rec->imode),
2745                                                     backref->index);
2746                         BUG_ON(ret);
2747                         btrfs_commit_transaction(trans, root);
2748                         repaired++;
2749                 }
2750
2751                 if (!delete && (backref->found_inode_ref &&
2752                                 backref->found_dir_index &&
2753                                 backref->found_dir_item &&
2754                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2755                                 !rec->found_inode_item)) {
2756                         ret = create_inode_item(root, rec, 0);
2757                         if (ret)
2758                                 break;
2759                         repaired++;
2760                 }
2761
2762         }
2763         return ret ? ret : repaired;
2764 }
2765
2766 /*
2767  * To determine the file type for nlink/inode_item repair
2768  *
2769  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2770  * Return -ENOENT if file type is not found.
2771  */
2772 static int find_file_type(struct inode_record *rec, u8 *type)
2773 {
2774         struct inode_backref *backref;
2775
2776         /* For inode item recovered case */
2777         if (rec->found_inode_item) {
2778                 *type = imode_to_type(rec->imode);
2779                 return 0;
2780         }
2781
2782         list_for_each_entry(backref, &rec->backrefs, list) {
2783                 if (backref->found_dir_index || backref->found_dir_item) {
2784                         *type = backref->filetype;
2785                         return 0;
2786                 }
2787         }
2788         return -ENOENT;
2789 }
2790
2791 /*
2792  * To determine the file name for nlink repair
2793  *
2794  * Return 0 if file name is found, set name and namelen.
2795  * Return -ENOENT if file name is not found.
2796  */
2797 static int find_file_name(struct inode_record *rec,
2798                           char *name, int *namelen)
2799 {
2800         struct inode_backref *backref;
2801
2802         list_for_each_entry(backref, &rec->backrefs, list) {
2803                 if (backref->found_dir_index || backref->found_dir_item ||
2804                     backref->found_inode_ref) {
2805                         memcpy(name, backref->name, backref->namelen);
2806                         *namelen = backref->namelen;
2807                         return 0;
2808                 }
2809         }
2810         return -ENOENT;
2811 }
2812
2813 /* Reset the nlink of the inode to the correct one */
2814 static int reset_nlink(struct btrfs_trans_handle *trans,
2815                        struct btrfs_root *root,
2816                        struct btrfs_path *path,
2817                        struct inode_record *rec)
2818 {
2819         struct inode_backref *backref;
2820         struct inode_backref *tmp;
2821         struct btrfs_key key;
2822         struct btrfs_inode_item *inode_item;
2823         int ret = 0;
2824
2825         /* We don't believe this either, reset it and iterate backref */
2826         rec->found_link = 0;
2827
2828         /* Remove all backref including the valid ones */
2829         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2830                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2831                                    backref->index, backref->name,
2832                                    backref->namelen, 0);
2833                 if (ret < 0)
2834                         goto out;
2835
2836                 /* remove invalid backref, so it won't be added back */
2837                 if (!(backref->found_dir_index &&
2838                       backref->found_dir_item &&
2839                       backref->found_inode_ref)) {
2840                         list_del(&backref->list);
2841                         free(backref);
2842                 } else {
2843                         rec->found_link++;
2844                 }
2845         }
2846
2847         /* Set nlink to 0 */
2848         key.objectid = rec->ino;
2849         key.type = BTRFS_INODE_ITEM_KEY;
2850         key.offset = 0;
2851         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2852         if (ret < 0)
2853                 goto out;
2854         if (ret > 0) {
2855                 ret = -ENOENT;
2856                 goto out;
2857         }
2858         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2859                                     struct btrfs_inode_item);
2860         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2861         btrfs_mark_buffer_dirty(path->nodes[0]);
2862         btrfs_release_path(path);
2863
2864         /*
2865          * Add back valid inode_ref/dir_item/dir_index,
2866          * add_link() will handle the nlink inc, so new nlink must be correct
2867          */
2868         list_for_each_entry(backref, &rec->backrefs, list) {
2869                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2870                                      backref->name, backref->namelen,
2871                                      backref->filetype, &backref->index, 1);
2872                 if (ret < 0)
2873                         goto out;
2874         }
2875 out:
2876         btrfs_release_path(path);
2877         return ret;
2878 }
2879
2880 static int get_highest_inode(struct btrfs_trans_handle *trans,
2881                                 struct btrfs_root *root,
2882                                 struct btrfs_path *path,
2883                                 u64 *highest_ino)
2884 {
2885         struct btrfs_key key, found_key;
2886         int ret;
2887
2888         btrfs_init_path(path);
2889         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2890         key.offset = -1;
2891         key.type = BTRFS_INODE_ITEM_KEY;
2892         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2893         if (ret == 1) {
2894                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2895                                 path->slots[0] - 1);
2896                 *highest_ino = found_key.objectid;
2897                 ret = 0;
2898         }
2899         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2900                 ret = -EOVERFLOW;
2901         btrfs_release_path(path);
2902         return ret;
2903 }
2904
2905 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2906                                struct btrfs_root *root,
2907                                struct btrfs_path *path,
2908                                struct inode_record *rec)
2909 {
2910         char *dir_name = "lost+found";
2911         char namebuf[BTRFS_NAME_LEN] = {0};
2912         u64 lost_found_ino;
2913         u32 mode = 0700;
2914         u8 type = 0;
2915         int namelen = 0;
2916         int name_recovered = 0;
2917         int type_recovered = 0;
2918         int ret = 0;
2919
2920         /*
2921          * Get file name and type first before these invalid inode ref
2922          * are deleted by remove_all_invalid_backref()
2923          */
2924         name_recovered = !find_file_name(rec, namebuf, &namelen);
2925         type_recovered = !find_file_type(rec, &type);
2926
2927         if (!name_recovered) {
2928                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2929                        rec->ino, rec->ino);
2930                 namelen = count_digits(rec->ino);
2931                 sprintf(namebuf, "%llu", rec->ino);
2932                 name_recovered = 1;
2933         }
2934         if (!type_recovered) {
2935                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2936                        rec->ino);
2937                 type = BTRFS_FT_REG_FILE;
2938                 type_recovered = 1;
2939         }
2940
2941         ret = reset_nlink(trans, root, path, rec);
2942         if (ret < 0) {
2943                 fprintf(stderr,
2944                         "Failed to reset nlink for inode %llu: %s\n",
2945                         rec->ino, strerror(-ret));
2946                 goto out;
2947         }
2948
2949         if (rec->found_link == 0) {
2950                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2951                 if (ret < 0)
2952                         goto out;
2953                 lost_found_ino++;
2954                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2955                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2956                                   mode);
2957                 if (ret < 0) {
2958                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2959                                 dir_name, strerror(-ret));
2960                         goto out;
2961                 }
2962                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2963                                      namebuf, namelen, type, NULL, 1);
2964                 /*
2965                  * Add ".INO" suffix several times to handle case where
2966                  * "FILENAME.INO" is already taken by another file.
2967                  */
2968                 while (ret == -EEXIST) {
2969                         /*
2970                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2971                          */
2972                         if (namelen + count_digits(rec->ino) + 1 >
2973                             BTRFS_NAME_LEN) {
2974                                 ret = -EFBIG;
2975                                 goto out;
2976                         }
2977                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2978                                  ".%llu", rec->ino);
2979                         namelen += count_digits(rec->ino) + 1;
2980                         ret = btrfs_add_link(trans, root, rec->ino,
2981                                              lost_found_ino, namebuf,
2982                                              namelen, type, NULL, 1);
2983                 }
2984                 if (ret < 0) {
2985                         fprintf(stderr,
2986                                 "Failed to link the inode %llu to %s dir: %s\n",
2987                                 rec->ino, dir_name, strerror(-ret));
2988                         goto out;
2989                 }
2990                 /*
2991                  * Just increase the found_link, don't actually add the
2992                  * backref. This will make things easier and this inode
2993                  * record will be freed after the repair is done.
2994                  * So fsck will not report problem about this inode.
2995                  */
2996                 rec->found_link++;
2997                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2998                        namelen, namebuf, dir_name);
2999         }
3000         printf("Fixed the nlink of inode %llu\n", rec->ino);
3001 out:
3002         /*
3003          * Clear the flag anyway, or we will loop forever for the same inode
3004          * as it will not be removed from the bad inode list and the dead loop
3005          * happens.
3006          */
3007         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3008         btrfs_release_path(path);
3009         return ret;
3010 }
3011
3012 /*
3013  * Check if there is any normal(reg or prealloc) file extent for given
3014  * ino.
3015  * This is used to determine the file type when neither its dir_index/item or
3016  * inode_item exists.
3017  *
3018  * This will *NOT* report error, if any error happens, just consider it does
3019  * not have any normal file extent.
3020  */
3021 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3022 {
3023         struct btrfs_path path;
3024         struct btrfs_key key;
3025         struct btrfs_key found_key;
3026         struct btrfs_file_extent_item *fi;
3027         u8 type;
3028         int ret = 0;
3029
3030         btrfs_init_path(&path);
3031         key.objectid = ino;
3032         key.type = BTRFS_EXTENT_DATA_KEY;
3033         key.offset = 0;
3034
3035         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3036         if (ret < 0) {
3037                 ret = 0;
3038                 goto out;
3039         }
3040         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3041                 ret = btrfs_next_leaf(root, &path);
3042                 if (ret) {
3043                         ret = 0;
3044                         goto out;
3045                 }
3046         }
3047         while (1) {
3048                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3049                                       path.slots[0]);
3050                 if (found_key.objectid != ino ||
3051                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3052                         break;
3053                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3054                                     struct btrfs_file_extent_item);
3055                 type = btrfs_file_extent_type(path.nodes[0], fi);
3056                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3057                         ret = 1;
3058                         goto out;
3059                 }
3060         }
3061 out:
3062         btrfs_release_path(&path);
3063         return ret;
3064 }
3065
3066 static u32 btrfs_type_to_imode(u8 type)
3067 {
3068         static u32 imode_by_btrfs_type[] = {
3069                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3070                 [BTRFS_FT_DIR]          = S_IFDIR,
3071                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3072                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3073                 [BTRFS_FT_FIFO]         = S_IFIFO,
3074                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3075                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3076         };
3077
3078         return imode_by_btrfs_type[(type)];
3079 }
3080
3081 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3082                                 struct btrfs_root *root,
3083                                 struct btrfs_path *path,
3084                                 struct inode_record *rec)
3085 {
3086         u8 filetype;
3087         u32 mode = 0700;
3088         int type_recovered = 0;
3089         int ret = 0;
3090
3091         printf("Trying to rebuild inode:%llu\n", rec->ino);
3092
3093         type_recovered = !find_file_type(rec, &filetype);
3094
3095         /*
3096          * Try to determine inode type if type not found.
3097          *
3098          * For found regular file extent, it must be FILE.
3099          * For found dir_item/index, it must be DIR.
3100          *
3101          * For undetermined one, use FILE as fallback.
3102          *
3103          * TODO:
3104          * 1. If found backref(inode_index/item is already handled) to it,
3105          *    it must be DIR.
3106          *    Need new inode-inode ref structure to allow search for that.
3107          */
3108         if (!type_recovered) {
3109                 if (rec->found_file_extent &&
3110                     find_normal_file_extent(root, rec->ino)) {
3111                         type_recovered = 1;
3112                         filetype = BTRFS_FT_REG_FILE;
3113                 } else if (rec->found_dir_item) {
3114                         type_recovered = 1;
3115                         filetype = BTRFS_FT_DIR;
3116                 } else if (!list_empty(&rec->orphan_extents)) {
3117                         type_recovered = 1;
3118                         filetype = BTRFS_FT_REG_FILE;
3119                 } else{
3120                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3121                                rec->ino);
3122                         type_recovered = 1;
3123                         filetype = BTRFS_FT_REG_FILE;
3124                 }
3125         }
3126
3127         ret = btrfs_new_inode(trans, root, rec->ino,
3128                               mode | btrfs_type_to_imode(filetype));
3129         if (ret < 0)
3130                 goto out;
3131
3132         /*
3133          * Here inode rebuild is done, we only rebuild the inode item,
3134          * don't repair the nlink(like move to lost+found).
3135          * That is the job of nlink repair.
3136          *
3137          * We just fill the record and return
3138          */
3139         rec->found_dir_item = 1;
3140         rec->imode = mode | btrfs_type_to_imode(filetype);
3141         rec->nlink = 0;
3142         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3143         /* Ensure the inode_nlinks repair function will be called */
3144         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3145 out:
3146         return ret;
3147 }
3148
3149 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3150                                       struct btrfs_root *root,
3151                                       struct btrfs_path *path,
3152                                       struct inode_record *rec)
3153 {
3154         struct orphan_data_extent *orphan;
3155         struct orphan_data_extent *tmp;
3156         int ret = 0;
3157
3158         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3159                 /*
3160                  * Check for conflicting file extents
3161                  *
3162                  * Here we don't know whether the extents is compressed or not,
3163                  * so we can only assume it not compressed nor data offset,
3164                  * and use its disk_len as extent length.
3165                  */
3166                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3167                                        orphan->offset, orphan->disk_len, 0);
3168                 btrfs_release_path(path);
3169                 if (ret < 0)
3170                         goto out;
3171                 if (!ret) {
3172                         fprintf(stderr,
3173                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3174                                 orphan->disk_bytenr, orphan->disk_len);
3175                         ret = btrfs_free_extent(trans,
3176                                         root->fs_info->extent_root,
3177                                         orphan->disk_bytenr, orphan->disk_len,
3178                                         0, root->objectid, orphan->objectid,
3179                                         orphan->offset);
3180                         if (ret < 0)
3181                                 goto out;
3182                 }
3183                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3184                                 orphan->offset, orphan->disk_bytenr,
3185                                 orphan->disk_len, orphan->disk_len);
3186                 if (ret < 0)
3187                         goto out;
3188
3189                 /* Update file size info */
3190                 rec->found_size += orphan->disk_len;
3191                 if (rec->found_size == rec->nbytes)
3192                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3193
3194                 /* Update the file extent hole info too */
3195                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3196                                            orphan->disk_len);
3197                 if (ret < 0)
3198                         goto out;
3199                 if (RB_EMPTY_ROOT(&rec->holes))
3200                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3201
3202                 list_del(&orphan->list);
3203                 free(orphan);
3204         }
3205         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3206 out:
3207         return ret;
3208 }
3209
3210 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3211                                         struct btrfs_root *root,
3212                                         struct btrfs_path *path,
3213                                         struct inode_record *rec)
3214 {
3215         struct rb_node *node;
3216         struct file_extent_hole *hole;
3217         int found = 0;
3218         int ret = 0;
3219
3220         node = rb_first(&rec->holes);
3221
3222         while (node) {
3223                 found = 1;
3224                 hole = rb_entry(node, struct file_extent_hole, node);
3225                 ret = btrfs_punch_hole(trans, root, rec->ino,
3226                                        hole->start, hole->len);
3227                 if (ret < 0)
3228                         goto out;
3229                 ret = del_file_extent_hole(&rec->holes, hole->start,
3230                                            hole->len);
3231                 if (ret < 0)
3232                         goto out;
3233                 if (RB_EMPTY_ROOT(&rec->holes))
3234                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3235                 node = rb_first(&rec->holes);
3236         }
3237         /* special case for a file losing all its file extent */
3238         if (!found) {
3239                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3240                                        round_up(rec->isize,
3241                                                 root->fs_info->sectorsize));
3242                 if (ret < 0)
3243                         goto out;
3244         }
3245         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3246                rec->ino, root->objectid);
3247 out:
3248         return ret;
3249 }
3250
3251 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3252 {
3253         struct btrfs_trans_handle *trans;
3254         struct btrfs_path path;
3255         int ret = 0;
3256
3257         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3258                              I_ERR_NO_ORPHAN_ITEM |
3259                              I_ERR_LINK_COUNT_WRONG |
3260                              I_ERR_NO_INODE_ITEM |
3261                              I_ERR_FILE_EXTENT_ORPHAN |
3262                              I_ERR_FILE_EXTENT_DISCOUNT|
3263                              I_ERR_FILE_NBYTES_WRONG)))
3264                 return rec->errors;
3265
3266         /*
3267          * For nlink repair, it may create a dir and add link, so
3268          * 2 for parent(256)'s dir_index and dir_item
3269          * 2 for lost+found dir's inode_item and inode_ref
3270          * 1 for the new inode_ref of the file
3271          * 2 for lost+found dir's dir_index and dir_item for the file
3272          */
3273         trans = btrfs_start_transaction(root, 7);
3274         if (IS_ERR(trans))
3275                 return PTR_ERR(trans);
3276
3277         btrfs_init_path(&path);
3278         if (rec->errors & I_ERR_NO_INODE_ITEM)
3279                 ret = repair_inode_no_item(trans, root, &path, rec);
3280         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3281                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3282         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3283                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3284         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3285                 ret = repair_inode_isize(trans, root, &path, rec);
3286         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3287                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3288         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3289                 ret = repair_inode_nlinks(trans, root, &path, rec);
3290         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3291                 ret = repair_inode_nbytes(trans, root, &path, rec);
3292         btrfs_commit_transaction(trans, root);
3293         btrfs_release_path(&path);
3294         return ret;
3295 }
3296
3297 static int check_inode_recs(struct btrfs_root *root,
3298                             struct cache_tree *inode_cache)
3299 {
3300         struct cache_extent *cache;
3301         struct ptr_node *node;
3302         struct inode_record *rec;
3303         struct inode_backref *backref;
3304         int stage = 0;
3305         int ret = 0;
3306         int err = 0;
3307         u64 error = 0;
3308         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3309
3310         if (btrfs_root_refs(&root->root_item) == 0) {
3311                 if (!cache_tree_empty(inode_cache))
3312                         fprintf(stderr, "warning line %d\n", __LINE__);
3313                 return 0;
3314         }
3315
3316         /*
3317          * We need to repair backrefs first because we could change some of the
3318          * errors in the inode recs.
3319          *
3320          * We also need to go through and delete invalid backrefs first and then
3321          * add the correct ones second.  We do this because we may get EEXIST
3322          * when adding back the correct index because we hadn't yet deleted the
3323          * invalid index.
3324          *
3325          * For example, if we were missing a dir index then the directories
3326          * isize would be wrong, so if we fixed the isize to what we thought it
3327          * would be and then fixed the backref we'd still have a invalid fs, so
3328          * we need to add back the dir index and then check to see if the isize
3329          * is still wrong.
3330          */
3331         while (stage < 3) {
3332                 stage++;
3333                 if (stage == 3 && !err)
3334                         break;
3335
3336                 cache = search_cache_extent(inode_cache, 0);
3337                 while (repair && cache) {
3338                         node = container_of(cache, struct ptr_node, cache);
3339                         rec = node->data;
3340                         cache = next_cache_extent(cache);
3341
3342                         /* Need to free everything up and rescan */
3343                         if (stage == 3) {
3344                                 remove_cache_extent(inode_cache, &node->cache);
3345                                 free(node);
3346                                 free_inode_rec(rec);
3347                                 continue;
3348                         }
3349
3350                         if (list_empty(&rec->backrefs))
3351                                 continue;
3352
3353                         ret = repair_inode_backrefs(root, rec, inode_cache,
3354                                                     stage == 1);
3355                         if (ret < 0) {
3356                                 err = ret;
3357                                 stage = 2;
3358                                 break;
3359                         } if (ret > 0) {
3360                                 err = -EAGAIN;
3361                         }
3362                 }
3363         }
3364         if (err)
3365                 return err;
3366
3367         rec = get_inode_rec(inode_cache, root_dirid, 0);
3368         BUG_ON(IS_ERR(rec));
3369         if (rec) {
3370                 ret = check_root_dir(rec);
3371                 if (ret) {
3372                         fprintf(stderr, "root %llu root dir %llu error\n",
3373                                 (unsigned long long)root->root_key.objectid,
3374                                 (unsigned long long)root_dirid);
3375                         print_inode_error(root, rec);
3376                         error++;
3377                 }
3378         } else {
3379                 if (repair) {
3380                         struct btrfs_trans_handle *trans;
3381
3382                         trans = btrfs_start_transaction(root, 1);
3383                         if (IS_ERR(trans)) {
3384                                 err = PTR_ERR(trans);
3385                                 return err;
3386                         }
3387
3388                         fprintf(stderr,
3389                                 "root %llu missing its root dir, recreating\n",
3390                                 (unsigned long long)root->objectid);
3391
3392                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3393                         BUG_ON(ret);
3394
3395                         btrfs_commit_transaction(trans, root);
3396                         return -EAGAIN;
3397                 }
3398
3399                 fprintf(stderr, "root %llu root dir %llu not found\n",
3400                         (unsigned long long)root->root_key.objectid,
3401                         (unsigned long long)root_dirid);
3402         }
3403
3404         while (1) {
3405                 cache = search_cache_extent(inode_cache, 0);
3406                 if (!cache)
3407                         break;
3408                 node = container_of(cache, struct ptr_node, cache);
3409                 rec = node->data;
3410                 remove_cache_extent(inode_cache, &node->cache);
3411                 free(node);
3412                 if (rec->ino == root_dirid ||
3413                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3414                         free_inode_rec(rec);
3415                         continue;
3416                 }
3417
3418                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3419                         ret = check_orphan_item(root, rec->ino);
3420                         if (ret == 0)
3421                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3422                         if (can_free_inode_rec(rec)) {
3423                                 free_inode_rec(rec);
3424                                 continue;
3425                         }
3426                 }
3427
3428                 if (!rec->found_inode_item)
3429                         rec->errors |= I_ERR_NO_INODE_ITEM;
3430                 if (rec->found_link != rec->nlink)
3431                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3432                 if (repair) {
3433                         ret = try_repair_inode(root, rec);
3434                         if (ret == 0 && can_free_inode_rec(rec)) {
3435                                 free_inode_rec(rec);
3436                                 continue;
3437                         }
3438                         ret = 0;
3439                 }
3440
3441                 if (!(repair && ret == 0))
3442                         error++;
3443                 print_inode_error(root, rec);
3444                 list_for_each_entry(backref, &rec->backrefs, list) {
3445                         if (!backref->found_dir_item)
3446                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3447                         if (!backref->found_dir_index)
3448                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3449                         if (!backref->found_inode_ref)
3450                                 backref->errors |= REF_ERR_NO_INODE_REF;
3451                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3452                                 " namelen %u name %s filetype %d errors %x",
3453                                 (unsigned long long)backref->dir,
3454                                 (unsigned long long)backref->index,
3455                                 backref->namelen, backref->name,
3456                                 backref->filetype, backref->errors);
3457                         print_ref_error(backref->errors);
3458                 }
3459                 free_inode_rec(rec);
3460         }
3461         return (error > 0) ? -1 : 0;
3462 }
3463
3464 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3465                                         u64 objectid)
3466 {
3467         struct cache_extent *cache;
3468         struct root_record *rec = NULL;
3469         int ret;
3470
3471         cache = lookup_cache_extent(root_cache, objectid, 1);
3472         if (cache) {
3473                 rec = container_of(cache, struct root_record, cache);
3474         } else {
3475                 rec = calloc(1, sizeof(*rec));
3476                 if (!rec)
3477                         return ERR_PTR(-ENOMEM);
3478                 rec->objectid = objectid;
3479                 INIT_LIST_HEAD(&rec->backrefs);
3480                 rec->cache.start = objectid;
3481                 rec->cache.size = 1;
3482
3483                 ret = insert_cache_extent(root_cache, &rec->cache);
3484                 if (ret)
3485                         return ERR_PTR(-EEXIST);
3486         }
3487         return rec;
3488 }
3489
3490 static struct root_backref *get_root_backref(struct root_record *rec,
3491                                              u64 ref_root, u64 dir, u64 index,
3492                                              const char *name, int namelen)
3493 {
3494         struct root_backref *backref;
3495
3496         list_for_each_entry(backref, &rec->backrefs, list) {
3497                 if (backref->ref_root != ref_root || backref->dir != dir ||
3498                     backref->namelen != namelen)
3499                         continue;
3500                 if (memcmp(name, backref->name, namelen))
3501                         continue;
3502                 return backref;
3503         }
3504
3505         backref = calloc(1, sizeof(*backref) + namelen + 1);
3506         if (!backref)
3507                 return NULL;
3508         backref->ref_root = ref_root;
3509         backref->dir = dir;
3510         backref->index = index;
3511         backref->namelen = namelen;
3512         memcpy(backref->name, name, namelen);
3513         backref->name[namelen] = '\0';
3514         list_add_tail(&backref->list, &rec->backrefs);
3515         return backref;
3516 }
3517
3518 static void free_root_record(struct cache_extent *cache)
3519 {
3520         struct root_record *rec;
3521         struct root_backref *backref;
3522
3523         rec = container_of(cache, struct root_record, cache);
3524         while (!list_empty(&rec->backrefs)) {
3525                 backref = to_root_backref(rec->backrefs.next);
3526                 list_del(&backref->list);
3527                 free(backref);
3528         }
3529
3530         free(rec);
3531 }
3532
3533 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3534
3535 static int add_root_backref(struct cache_tree *root_cache,
3536                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3537                             const char *name, int namelen,
3538                             int item_type, int errors)
3539 {
3540         struct root_record *rec;
3541         struct root_backref *backref;
3542
3543         rec = get_root_rec(root_cache, root_id);
3544         BUG_ON(IS_ERR(rec));
3545         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3546         BUG_ON(!backref);
3547
3548         backref->errors |= errors;
3549
3550         if (item_type != BTRFS_DIR_ITEM_KEY) {
3551                 if (backref->found_dir_index || backref->found_back_ref ||
3552                     backref->found_forward_ref) {
3553                         if (backref->index != index)
3554                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3555                 } else {
3556                         backref->index = index;
3557                 }
3558         }
3559
3560         if (item_type == BTRFS_DIR_ITEM_KEY) {
3561                 if (backref->found_forward_ref)
3562                         rec->found_ref++;
3563                 backref->found_dir_item = 1;
3564         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3565                 backref->found_dir_index = 1;
3566         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3567                 if (backref->found_forward_ref)
3568                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3569                 else if (backref->found_dir_item)
3570                         rec->found_ref++;
3571                 backref->found_forward_ref = 1;
3572         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3573                 if (backref->found_back_ref)
3574                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3575                 backref->found_back_ref = 1;
3576         } else {
3577                 BUG_ON(1);
3578         }
3579
3580         if (backref->found_forward_ref && backref->found_dir_item)
3581                 backref->reachable = 1;
3582         return 0;
3583 }
3584
3585 static int merge_root_recs(struct btrfs_root *root,
3586                            struct cache_tree *src_cache,
3587                            struct cache_tree *dst_cache)
3588 {
3589         struct cache_extent *cache;
3590         struct ptr_node *node;
3591         struct inode_record *rec;
3592         struct inode_backref *backref;
3593         int ret = 0;
3594
3595         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3596                 free_inode_recs_tree(src_cache);
3597                 return 0;
3598         }
3599
3600         while (1) {
3601                 cache = search_cache_extent(src_cache, 0);
3602                 if (!cache)
3603                         break;
3604                 node = container_of(cache, struct ptr_node, cache);
3605                 rec = node->data;
3606                 remove_cache_extent(src_cache, &node->cache);
3607                 free(node);
3608
3609                 ret = is_child_root(root, root->objectid, rec->ino);
3610                 if (ret < 0)
3611                         break;
3612                 else if (ret == 0)
3613                         goto skip;
3614
3615                 list_for_each_entry(backref, &rec->backrefs, list) {
3616                         BUG_ON(backref->found_inode_ref);
3617                         if (backref->found_dir_item)
3618                                 add_root_backref(dst_cache, rec->ino,
3619                                         root->root_key.objectid, backref->dir,
3620                                         backref->index, backref->name,
3621                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3622                                         backref->errors);
3623                         if (backref->found_dir_index)
3624                                 add_root_backref(dst_cache, rec->ino,
3625                                         root->root_key.objectid, backref->dir,
3626                                         backref->index, backref->name,
3627                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3628                                         backref->errors);
3629                 }
3630 skip:
3631                 free_inode_rec(rec);
3632         }
3633         if (ret < 0)
3634                 return ret;
3635         return 0;
3636 }
3637
3638 static int check_root_refs(struct btrfs_root *root,
3639                            struct cache_tree *root_cache)
3640 {
3641         struct root_record *rec;
3642         struct root_record *ref_root;
3643         struct root_backref *backref;
3644         struct cache_extent *cache;
3645         int loop = 1;
3646         int ret;
3647         int error;
3648         int errors = 0;
3649
3650         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3651         BUG_ON(IS_ERR(rec));
3652         rec->found_ref = 1;
3653
3654         /* fixme: this can not detect circular references */
3655         while (loop) {
3656                 loop = 0;
3657                 cache = search_cache_extent(root_cache, 0);
3658                 while (1) {
3659                         if (!cache)
3660                                 break;
3661                         rec = container_of(cache, struct root_record, cache);
3662                         cache = next_cache_extent(cache);
3663
3664                         if (rec->found_ref == 0)
3665                                 continue;
3666
3667                         list_for_each_entry(backref, &rec->backrefs, list) {
3668                                 if (!backref->reachable)
3669                                         continue;
3670
3671                                 ref_root = get_root_rec(root_cache,
3672                                                         backref->ref_root);
3673                                 BUG_ON(IS_ERR(ref_root));
3674                                 if (ref_root->found_ref > 0)
3675                                         continue;
3676
3677                                 backref->reachable = 0;
3678                                 rec->found_ref--;
3679                                 if (rec->found_ref == 0)
3680                                         loop = 1;
3681                         }
3682                 }
3683         }
3684
3685         cache = search_cache_extent(root_cache, 0);
3686         while (1) {
3687                 if (!cache)
3688                         break;
3689                 rec = container_of(cache, struct root_record, cache);
3690                 cache = next_cache_extent(cache);
3691
3692                 if (rec->found_ref == 0 &&
3693                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3694                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3695                         ret = check_orphan_item(root->fs_info->tree_root,
3696                                                 rec->objectid);
3697                         if (ret == 0)
3698                                 continue;
3699
3700                         /*
3701                          * If we don't have a root item then we likely just have
3702                          * a dir item in a snapshot for this root but no actual
3703                          * ref key or anything so it's meaningless.
3704                          */
3705                         if (!rec->found_root_item)
3706                                 continue;
3707                         errors++;
3708                         fprintf(stderr, "fs tree %llu not referenced\n",
3709                                 (unsigned long long)rec->objectid);
3710                 }
3711
3712                 error = 0;
3713                 if (rec->found_ref > 0 && !rec->found_root_item)
3714                         error = 1;
3715                 list_for_each_entry(backref, &rec->backrefs, list) {
3716                         if (!backref->found_dir_item)
3717                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3718                         if (!backref->found_dir_index)
3719                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3720                         if (!backref->found_back_ref)
3721                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3722                         if (!backref->found_forward_ref)
3723                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3724                         if (backref->reachable && backref->errors)
3725                                 error = 1;
3726                 }
3727                 if (!error)
3728                         continue;
3729
3730                 errors++;
3731                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3732                         (unsigned long long)rec->objectid, rec->found_ref,
3733                          rec->found_root_item ? "" : "not found");
3734
3735                 list_for_each_entry(backref, &rec->backrefs, list) {
3736                         if (!backref->reachable)
3737                                 continue;
3738                         if (!backref->errors && rec->found_root_item)
3739                                 continue;
3740                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3741                                 " index %llu namelen %u name %s errors %x\n",
3742                                 (unsigned long long)backref->ref_root,
3743                                 (unsigned long long)backref->dir,
3744                                 (unsigned long long)backref->index,
3745                                 backref->namelen, backref->name,
3746                                 backref->errors);
3747                         print_ref_error(backref->errors);
3748                 }
3749         }
3750         return errors > 0 ? 1 : 0;
3751 }
3752
3753 static int process_root_ref(struct extent_buffer *eb, int slot,
3754                             struct btrfs_key *key,
3755                             struct cache_tree *root_cache)
3756 {
3757         u64 dirid;
3758         u64 index;
3759         u32 len;
3760         u32 name_len;
3761         struct btrfs_root_ref *ref;
3762         char namebuf[BTRFS_NAME_LEN];
3763         int error;
3764
3765         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3766
3767         dirid = btrfs_root_ref_dirid(eb, ref);
3768         index = btrfs_root_ref_sequence(eb, ref);
3769         name_len = btrfs_root_ref_name_len(eb, ref);
3770
3771         if (name_len <= BTRFS_NAME_LEN) {
3772                 len = name_len;
3773                 error = 0;
3774         } else {
3775                 len = BTRFS_NAME_LEN;
3776                 error = REF_ERR_NAME_TOO_LONG;
3777         }
3778         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3779
3780         if (key->type == BTRFS_ROOT_REF_KEY) {
3781                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3782                                  index, namebuf, len, key->type, error);
3783         } else {
3784                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3785                                  index, namebuf, len, key->type, error);
3786         }
3787         return 0;
3788 }
3789
3790 static void free_corrupt_block(struct cache_extent *cache)
3791 {
3792         struct btrfs_corrupt_block *corrupt;
3793
3794         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3795         free(corrupt);
3796 }
3797
3798 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3799
3800 /*
3801  * Repair the btree of the given root.
3802  *
3803  * The fix is to remove the node key in corrupt_blocks cache_tree.
3804  * and rebalance the tree.
3805  * After the fix, the btree should be writeable.
3806  */
3807 static int repair_btree(struct btrfs_root *root,
3808                         struct cache_tree *corrupt_blocks)
3809 {
3810         struct btrfs_trans_handle *trans;
3811         struct btrfs_path path;
3812         struct btrfs_corrupt_block *corrupt;
3813         struct cache_extent *cache;
3814         struct btrfs_key key;
3815         u64 offset;
3816         int level;
3817         int ret = 0;
3818
3819         if (cache_tree_empty(corrupt_blocks))
3820                 return 0;
3821
3822         trans = btrfs_start_transaction(root, 1);
3823         if (IS_ERR(trans)) {
3824                 ret = PTR_ERR(trans);
3825                 fprintf(stderr, "Error starting transaction: %s\n",
3826                         strerror(-ret));
3827                 return ret;
3828         }
3829         btrfs_init_path(&path);
3830         cache = first_cache_extent(corrupt_blocks);
3831         while (cache) {
3832                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3833                                        cache);
3834                 level = corrupt->level;
3835                 path.lowest_level = level;
3836                 key.objectid = corrupt->key.objectid;
3837                 key.type = corrupt->key.type;
3838                 key.offset = corrupt->key.offset;
3839
3840                 /*
3841                  * Here we don't want to do any tree balance, since it may
3842                  * cause a balance with corrupted brother leaf/node,
3843                  * so ins_len set to 0 here.
3844                  * Balance will be done after all corrupt node/leaf is deleted.
3845                  */
3846                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3847                 if (ret < 0)
3848                         goto out;
3849                 offset = btrfs_node_blockptr(path.nodes[level],
3850                                              path.slots[level]);
3851
3852                 /* Remove the ptr */
3853                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3854                 if (ret < 0)
3855                         goto out;
3856                 /*
3857                  * Remove the corresponding extent
3858                  * return value is not concerned.
3859                  */
3860                 btrfs_release_path(&path);
3861                 ret = btrfs_free_extent(trans, root, offset,
3862                                 root->fs_info->nodesize, 0,
3863                                 root->root_key.objectid, level - 1, 0);
3864                 cache = next_cache_extent(cache);
3865         }
3866
3867         /* Balance the btree using btrfs_search_slot() */
3868         cache = first_cache_extent(corrupt_blocks);
3869         while (cache) {
3870                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3871                                        cache);
3872                 memcpy(&key, &corrupt->key, sizeof(key));
3873                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3874                 if (ret < 0)
3875                         goto out;
3876                 /* return will always >0 since it won't find the item */
3877                 ret = 0;
3878                 btrfs_release_path(&path);
3879                 cache = next_cache_extent(cache);
3880         }
3881 out:
3882         btrfs_commit_transaction(trans, root);
3883         btrfs_release_path(&path);
3884         return ret;
3885 }
3886
3887 static int check_fs_root(struct btrfs_root *root,
3888                          struct cache_tree *root_cache,
3889                          struct walk_control *wc)
3890 {
3891         int ret = 0;
3892         int err = 0;
3893         int wret;
3894         int level;
3895         struct btrfs_path path;
3896         struct shared_node root_node;
3897         struct root_record *rec;
3898         struct btrfs_root_item *root_item = &root->root_item;
3899         struct cache_tree corrupt_blocks;
3900         struct orphan_data_extent *orphan;
3901         struct orphan_data_extent *tmp;
3902         enum btrfs_tree_block_status status;
3903         struct node_refs nrefs;
3904
3905         /*
3906          * Reuse the corrupt_block cache tree to record corrupted tree block
3907          *
3908          * Unlike the usage in extent tree check, here we do it in a per
3909          * fs/subvol tree base.
3910          */
3911         cache_tree_init(&corrupt_blocks);
3912         root->fs_info->corrupt_blocks = &corrupt_blocks;
3913
3914         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3915                 rec = get_root_rec(root_cache, root->root_key.objectid);
3916                 BUG_ON(IS_ERR(rec));
3917                 if (btrfs_root_refs(root_item) > 0)
3918                         rec->found_root_item = 1;
3919         }
3920
3921         btrfs_init_path(&path);
3922         memset(&root_node, 0, sizeof(root_node));
3923         cache_tree_init(&root_node.root_cache);
3924         cache_tree_init(&root_node.inode_cache);
3925         memset(&nrefs, 0, sizeof(nrefs));
3926
3927         /* Move the orphan extent record to corresponding inode_record */
3928         list_for_each_entry_safe(orphan, tmp,
3929                                  &root->orphan_data_extents, list) {
3930                 struct inode_record *inode;
3931
3932                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3933                                       1);
3934                 BUG_ON(IS_ERR(inode));
3935                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3936                 list_move(&orphan->list, &inode->orphan_extents);
3937         }
3938
3939         level = btrfs_header_level(root->node);
3940         memset(wc->nodes, 0, sizeof(wc->nodes));
3941         wc->nodes[level] = &root_node;
3942         wc->active_node = level;
3943         wc->root_level = level;
3944
3945         /* We may not have checked the root block, lets do that now */
3946         if (btrfs_is_leaf(root->node))
3947                 status = btrfs_check_leaf(root, NULL, root->node);
3948         else
3949                 status = btrfs_check_node(root, NULL, root->node);
3950         if (status != BTRFS_TREE_BLOCK_CLEAN)
3951                 return -EIO;
3952
3953         if (btrfs_root_refs(root_item) > 0 ||
3954             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3955                 path.nodes[level] = root->node;
3956                 extent_buffer_get(root->node);
3957                 path.slots[level] = 0;
3958         } else {
3959                 struct btrfs_key key;
3960                 struct btrfs_disk_key found_key;
3961
3962                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3963                 level = root_item->drop_level;
3964                 path.lowest_level = level;
3965                 if (level > btrfs_header_level(root->node) ||
3966                     level >= BTRFS_MAX_LEVEL) {
3967                         error("ignoring invalid drop level: %u", level);
3968                         goto skip_walking;
3969                 }
3970                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3971                 if (wret < 0)
3972                         goto skip_walking;
3973                 btrfs_node_key(path.nodes[level], &found_key,
3974                                 path.slots[level]);
3975                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3976                                         sizeof(found_key)));
3977         }
3978
3979         while (1) {
3980                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3981                 if (wret < 0)
3982                         ret = wret;
3983                 if (wret != 0)
3984                         break;
3985
3986                 wret = walk_up_tree(root, &path, wc, &level);
3987                 if (wret < 0)
3988                         ret = wret;
3989                 if (wret != 0)
3990                         break;
3991         }
3992 skip_walking:
3993         btrfs_release_path(&path);
3994
3995         if (!cache_tree_empty(&corrupt_blocks)) {
3996                 struct cache_extent *cache;
3997                 struct btrfs_corrupt_block *corrupt;
3998
3999                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4000                        root->root_key.objectid);
4001                 cache = first_cache_extent(&corrupt_blocks);
4002                 while (cache) {
4003                         corrupt = container_of(cache,
4004                                                struct btrfs_corrupt_block,
4005                                                cache);
4006                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4007                                cache->start, corrupt->level,
4008                                corrupt->key.objectid, corrupt->key.type,
4009                                corrupt->key.offset);
4010                         cache = next_cache_extent(cache);
4011                 }
4012                 if (repair) {
4013                         printf("Try to repair the btree for root %llu\n",
4014                                root->root_key.objectid);
4015                         ret = repair_btree(root, &corrupt_blocks);
4016                         if (ret < 0)
4017                                 fprintf(stderr, "Failed to repair btree: %s\n",
4018                                         strerror(-ret));
4019                         if (!ret)
4020                                 printf("Btree for root %llu is fixed\n",
4021                                        root->root_key.objectid);
4022                 }
4023         }
4024
4025         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4026         if (err < 0)
4027                 ret = err;
4028
4029         if (root_node.current) {
4030                 root_node.current->checked = 1;
4031                 maybe_free_inode_rec(&root_node.inode_cache,
4032                                 root_node.current);
4033         }
4034
4035         err = check_inode_recs(root, &root_node.inode_cache);
4036         if (!ret)
4037                 ret = err;
4038
4039         free_corrupt_blocks_tree(&corrupt_blocks);
4040         root->fs_info->corrupt_blocks = NULL;
4041         free_orphan_data_extents(&root->orphan_data_extents);
4042         return ret;
4043 }
4044
4045 static int fs_root_objectid(u64 objectid)
4046 {
4047         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4048             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4049                 return 1;
4050         return is_fstree(objectid);
4051 }
4052
4053 static int check_fs_roots(struct btrfs_root *root,
4054                           struct cache_tree *root_cache)
4055 {
4056         struct btrfs_path path;
4057         struct btrfs_key key;
4058         struct walk_control wc;
4059         struct extent_buffer *leaf, *tree_node;
4060         struct btrfs_root *tmp_root;
4061         struct btrfs_root *tree_root = root->fs_info->tree_root;
4062         int ret;
4063         int err = 0;
4064
4065         if (ctx.progress_enabled) {
4066                 ctx.tp = TASK_FS_ROOTS;
4067                 task_start(ctx.info);
4068         }
4069
4070         /*
4071          * Just in case we made any changes to the extent tree that weren't
4072          * reflected into the free space cache yet.
4073          */
4074         if (repair)
4075                 reset_cached_block_groups(root->fs_info);
4076         memset(&wc, 0, sizeof(wc));
4077         cache_tree_init(&wc.shared);
4078         btrfs_init_path(&path);
4079
4080 again:
4081         key.offset = 0;
4082         key.objectid = 0;
4083         key.type = BTRFS_ROOT_ITEM_KEY;
4084         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4085         if (ret < 0) {
4086                 err = 1;
4087                 goto out;
4088         }
4089         tree_node = tree_root->node;
4090         while (1) {
4091                 if (tree_node != tree_root->node) {
4092                         free_root_recs_tree(root_cache);
4093                         btrfs_release_path(&path);
4094                         goto again;
4095                 }
4096                 leaf = path.nodes[0];
4097                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4098                         ret = btrfs_next_leaf(tree_root, &path);
4099                         if (ret) {
4100                                 if (ret < 0)
4101                                         err = 1;
4102                                 break;
4103                         }
4104                         leaf = path.nodes[0];
4105                 }
4106                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4107                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4108                     fs_root_objectid(key.objectid)) {
4109                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4110                                 tmp_root = btrfs_read_fs_root_no_cache(
4111                                                 root->fs_info, &key);
4112                         } else {
4113                                 key.offset = (u64)-1;
4114                                 tmp_root = btrfs_read_fs_root(
4115                                                 root->fs_info, &key);
4116                         }
4117                         if (IS_ERR(tmp_root)) {
4118                                 err = 1;
4119                                 goto next;
4120                         }
4121                         ret = check_fs_root(tmp_root, root_cache, &wc);
4122                         if (ret == -EAGAIN) {
4123                                 free_root_recs_tree(root_cache);
4124                                 btrfs_release_path(&path);
4125                                 goto again;
4126                         }
4127                         if (ret)
4128                                 err = 1;
4129                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4130                                 btrfs_free_fs_root(tmp_root);
4131                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4132                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4133                         process_root_ref(leaf, path.slots[0], &key,
4134                                          root_cache);
4135                 }
4136 next:
4137                 path.slots[0]++;
4138         }
4139 out:
4140         btrfs_release_path(&path);
4141         if (err)
4142                 free_extent_cache_tree(&wc.shared);
4143         if (!cache_tree_empty(&wc.shared))
4144                 fprintf(stderr, "warning line %d\n", __LINE__);
4145
4146         task_stop(ctx.info);
4147
4148         return err;
4149 }
4150
4151 /*
4152  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4153  * INODE_REF/INODE_EXTREF match.
4154  *
4155  * @root:       the root of the fs/file tree
4156  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4157  * @key:        the key of the DIR_ITEM/DIR_INDEX
4158  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4159  *              distinguish root_dir between normal dir/file
4160  * @name:       the name in the INODE_REF/INODE_EXTREF
4161  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4162  * @mode:       the st_mode of INODE_ITEM
4163  *
4164  * Return 0 if no error occurred.
4165  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4166  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4167  * dir/file.
4168  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4169  * not match for normal dir/file.
4170  */
4171 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4172                          struct btrfs_key *key, u64 index, char *name,
4173                          u32 namelen, u32 mode)
4174 {
4175         struct btrfs_path path;
4176         struct extent_buffer *node;
4177         struct btrfs_dir_item *di;
4178         struct btrfs_key location;
4179         char namebuf[BTRFS_NAME_LEN] = {0};
4180         u32 total;
4181         u32 cur = 0;
4182         u32 len;
4183         u32 name_len;
4184         u32 data_len;
4185         u8 filetype;
4186         int slot;
4187         int ret;
4188
4189         btrfs_init_path(&path);
4190         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4191         if (ret < 0) {
4192                 ret = DIR_ITEM_MISSING;
4193                 goto out;
4194         }
4195
4196         /* Process root dir and goto out*/
4197         if (index == 0) {
4198                 if (ret == 0) {
4199                         ret = ROOT_DIR_ERROR;
4200                         error(
4201                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4202                                 root->objectid,
4203                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4204                                         "REF" : "EXTREF",
4205                                 ref_key->objectid, ref_key->offset,
4206                                 key->type == BTRFS_DIR_ITEM_KEY ?
4207                                         "DIR_ITEM" : "DIR_INDEX");
4208                 } else {
4209                         ret = 0;
4210                 }
4211
4212                 goto out;
4213         }
4214
4215         /* Process normal file/dir */
4216         if (ret > 0) {
4217                 ret = DIR_ITEM_MISSING;
4218                 error(
4219                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4220                         root->objectid,
4221                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4222                         ref_key->objectid, ref_key->offset,
4223                         key->type == BTRFS_DIR_ITEM_KEY ?
4224                                 "DIR_ITEM" : "DIR_INDEX",
4225                         key->objectid, key->offset, namelen, name,
4226                         imode_to_type(mode));
4227                 goto out;
4228         }
4229
4230         /* Check whether inode_id/filetype/name match */
4231         node = path.nodes[0];
4232         slot = path.slots[0];
4233         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4234         total = btrfs_item_size_nr(node, slot);
4235         while (cur < total) {
4236                 ret = DIR_ITEM_MISMATCH;
4237                 name_len = btrfs_dir_name_len(node, di);
4238                 data_len = btrfs_dir_data_len(node, di);
4239
4240                 btrfs_dir_item_key_to_cpu(node, di, &location);
4241                 if (location.objectid != ref_key->objectid ||
4242                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4243                     location.offset != 0)
4244                         goto next;
4245
4246                 filetype = btrfs_dir_type(node, di);
4247                 if (imode_to_type(mode) != filetype)
4248                         goto next;
4249
4250                 if (cur + sizeof(*di) + name_len > total ||
4251                     name_len > BTRFS_NAME_LEN) {
4252                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4253                                 root->objectid,
4254                                 key->type == BTRFS_DIR_ITEM_KEY ?
4255                                 "DIR_ITEM" : "DIR_INDEX",
4256                                 key->objectid, key->offset, name_len);
4257
4258                         if (cur + sizeof(*di) > total)
4259                                 break;
4260                         len = min_t(u32, total - cur - sizeof(*di),
4261                                     BTRFS_NAME_LEN);
4262                 } else {
4263                         len = name_len;
4264                 }
4265
4266                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4267                 if (len != namelen || strncmp(namebuf, name, len))
4268                         goto next;
4269
4270                 ret = 0;
4271                 goto out;
4272 next:
4273                 len = sizeof(*di) + name_len + data_len;
4274                 di = (struct btrfs_dir_item *)((char *)di + len);
4275                 cur += len;
4276         }
4277         if (ret == DIR_ITEM_MISMATCH)
4278                 error(
4279                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4280                         root->objectid,
4281                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4282                         ref_key->objectid, ref_key->offset,
4283                         key->type == BTRFS_DIR_ITEM_KEY ?
4284                                 "DIR_ITEM" : "DIR_INDEX",
4285                         key->objectid, key->offset, namelen, name,
4286                         imode_to_type(mode));
4287 out:
4288         btrfs_release_path(&path);
4289         return ret;
4290 }
4291
4292 /*
4293  * Traverse the given INODE_REF and call find_dir_item() to find related
4294  * DIR_ITEM/DIR_INDEX.
4295  *
4296  * @root:       the root of the fs/file tree
4297  * @ref_key:    the key of the INODE_REF
4298  * @refs:       the count of INODE_REF
4299  * @mode:       the st_mode of INODE_ITEM
4300  *
4301  * Return 0 if no error occurred.
4302  */
4303 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4304                            struct extent_buffer *node, int slot, u64 *refs,
4305                            int mode)
4306 {
4307         struct btrfs_key key;
4308         struct btrfs_inode_ref *ref;
4309         char namebuf[BTRFS_NAME_LEN] = {0};
4310         u32 total;
4311         u32 cur = 0;
4312         u32 len;
4313         u32 name_len;
4314         u64 index;
4315         int ret, err = 0;
4316
4317         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4318         total = btrfs_item_size_nr(node, slot);
4319
4320 next:
4321         /* Update inode ref count */
4322         (*refs)++;
4323
4324         index = btrfs_inode_ref_index(node, ref);
4325         name_len = btrfs_inode_ref_name_len(node, ref);
4326         if (cur + sizeof(*ref) + name_len > total ||
4327             name_len > BTRFS_NAME_LEN) {
4328                 warning("root %llu INODE_REF[%llu %llu] name too long",
4329                         root->objectid, ref_key->objectid, ref_key->offset);
4330
4331                 if (total < cur + sizeof(*ref))
4332                         goto out;
4333                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4334         } else {
4335                 len = name_len;
4336         }
4337
4338         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4339
4340         /* Check root dir ref name */
4341         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4342                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4343                       root->objectid, ref_key->objectid, ref_key->offset,
4344                       namebuf);
4345                 err |= ROOT_DIR_ERROR;
4346         }
4347
4348         /* Find related DIR_INDEX */
4349         key.objectid = ref_key->offset;
4350         key.type = BTRFS_DIR_INDEX_KEY;
4351         key.offset = index;
4352         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4353         err |= ret;
4354
4355         /* Find related dir_item */
4356         key.objectid = ref_key->offset;
4357         key.type = BTRFS_DIR_ITEM_KEY;
4358         key.offset = btrfs_name_hash(namebuf, len);
4359         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4360         err |= ret;
4361
4362         len = sizeof(*ref) + name_len;
4363         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4364         cur += len;
4365         if (cur < total)
4366                 goto next;
4367
4368 out:
4369         return err;
4370 }
4371
4372 /*
4373  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4374  * DIR_ITEM/DIR_INDEX.
4375  *
4376  * @root:       the root of the fs/file tree
4377  * @ref_key:    the key of the INODE_EXTREF
4378  * @refs:       the count of INODE_EXTREF
4379  * @mode:       the st_mode of INODE_ITEM
4380  *
4381  * Return 0 if no error occurred.
4382  */
4383 static int check_inode_extref(struct btrfs_root *root,
4384                               struct btrfs_key *ref_key,
4385                               struct extent_buffer *node, int slot, u64 *refs,
4386                               int mode)
4387 {
4388         struct btrfs_key key;
4389         struct btrfs_inode_extref *extref;
4390         char namebuf[BTRFS_NAME_LEN] = {0};
4391         u32 total;
4392         u32 cur = 0;
4393         u32 len;
4394         u32 name_len;
4395         u64 index;
4396         u64 parent;
4397         int ret;
4398         int err = 0;
4399
4400         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4401         total = btrfs_item_size_nr(node, slot);
4402
4403 next:
4404         /* update inode ref count */
4405         (*refs)++;
4406         name_len = btrfs_inode_extref_name_len(node, extref);
4407         index = btrfs_inode_extref_index(node, extref);
4408         parent = btrfs_inode_extref_parent(node, extref);
4409         if (name_len <= BTRFS_NAME_LEN) {
4410                 len = name_len;
4411         } else {
4412                 len = BTRFS_NAME_LEN;
4413                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4414                         root->objectid, ref_key->objectid, ref_key->offset);
4415         }
4416         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4417
4418         /* Check root dir ref name */
4419         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4420                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4421                       root->objectid, ref_key->objectid, ref_key->offset,
4422                       namebuf);
4423                 err |= ROOT_DIR_ERROR;
4424         }
4425
4426         /* find related dir_index */
4427         key.objectid = parent;
4428         key.type = BTRFS_DIR_INDEX_KEY;
4429         key.offset = index;
4430         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4431         err |= ret;
4432
4433         /* find related dir_item */
4434         key.objectid = parent;
4435         key.type = BTRFS_DIR_ITEM_KEY;
4436         key.offset = btrfs_name_hash(namebuf, len);
4437         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4438         err |= ret;
4439
4440         len = sizeof(*extref) + name_len;
4441         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4442         cur += len;
4443
4444         if (cur < total)
4445                 goto next;
4446
4447         return err;
4448 }
4449
4450 /*
4451  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4452  * DIR_ITEM/DIR_INDEX match.
4453  *
4454  * @root:       the root of the fs/file tree
4455  * @key:        the key of the INODE_REF/INODE_EXTREF
4456  * @name:       the name in the INODE_REF/INODE_EXTREF
4457  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4458  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4459  * to (u64)-1
4460  * @ext_ref:    the EXTENDED_IREF feature
4461  *
4462  * Return 0 if no error occurred.
4463  * Return >0 for error bitmap
4464  */
4465 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4466                           char *name, int namelen, u64 index,
4467                           unsigned int ext_ref)
4468 {
4469         struct btrfs_path path;
4470         struct btrfs_inode_ref *ref;
4471         struct btrfs_inode_extref *extref;
4472         struct extent_buffer *node;
4473         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4474         u32 total;
4475         u32 cur = 0;
4476         u32 len;
4477         u32 ref_namelen;
4478         u64 ref_index;
4479         u64 parent;
4480         u64 dir_id;
4481         int slot;
4482         int ret;
4483
4484         btrfs_init_path(&path);
4485         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4486         if (ret) {
4487                 ret = INODE_REF_MISSING;
4488                 goto extref;
4489         }
4490
4491         node = path.nodes[0];
4492         slot = path.slots[0];
4493
4494         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4495         total = btrfs_item_size_nr(node, slot);
4496
4497         /* Iterate all entry of INODE_REF */
4498         while (cur < total) {
4499                 ret = INODE_REF_MISSING;
4500
4501                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4502                 ref_index = btrfs_inode_ref_index(node, ref);
4503                 if (index != (u64)-1 && index != ref_index)
4504                         goto next_ref;
4505
4506                 if (cur + sizeof(*ref) + ref_namelen > total ||
4507                     ref_namelen > BTRFS_NAME_LEN) {
4508                         warning("root %llu INODE %s[%llu %llu] name too long",
4509                                 root->objectid,
4510                                 key->type == BTRFS_INODE_REF_KEY ?
4511                                         "REF" : "EXTREF",
4512                                 key->objectid, key->offset);
4513
4514                         if (cur + sizeof(*ref) > total)
4515                                 break;
4516                         len = min_t(u32, total - cur - sizeof(*ref),
4517                                     BTRFS_NAME_LEN);
4518                 } else {
4519                         len = ref_namelen;
4520                 }
4521
4522                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4523                                    len);
4524
4525                 if (len != namelen || strncmp(ref_namebuf, name, len))
4526                         goto next_ref;
4527
4528                 ret = 0;
4529                 goto out;
4530 next_ref:
4531                 len = sizeof(*ref) + ref_namelen;
4532                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4533                 cur += len;
4534         }
4535
4536 extref:
4537         /* Skip if not support EXTENDED_IREF feature */
4538         if (!ext_ref)
4539                 goto out;
4540
4541         btrfs_release_path(&path);
4542         btrfs_init_path(&path);
4543
4544         dir_id = key->offset;
4545         key->type = BTRFS_INODE_EXTREF_KEY;
4546         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4547
4548         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4549         if (ret) {
4550                 ret = INODE_REF_MISSING;
4551                 goto out;
4552         }
4553
4554         node = path.nodes[0];
4555         slot = path.slots[0];
4556
4557         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4558         cur = 0;
4559         total = btrfs_item_size_nr(node, slot);
4560
4561         /* Iterate all entry of INODE_EXTREF */
4562         while (cur < total) {
4563                 ret = INODE_REF_MISSING;
4564
4565                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4566                 ref_index = btrfs_inode_extref_index(node, extref);
4567                 parent = btrfs_inode_extref_parent(node, extref);
4568                 if (index != (u64)-1 && index != ref_index)
4569                         goto next_extref;
4570
4571                 if (parent != dir_id)
4572                         goto next_extref;
4573
4574                 if (ref_namelen <= BTRFS_NAME_LEN) {
4575                         len = ref_namelen;
4576                 } else {
4577                         len = BTRFS_NAME_LEN;
4578                         warning("root %llu INODE %s[%llu %llu] name too long",
4579                                 root->objectid,
4580                                 key->type == BTRFS_INODE_REF_KEY ?
4581                                         "REF" : "EXTREF",
4582                                 key->objectid, key->offset);
4583                 }
4584                 read_extent_buffer(node, ref_namebuf,
4585                                    (unsigned long)(extref + 1), len);
4586
4587                 if (len != namelen || strncmp(ref_namebuf, name, len))
4588                         goto next_extref;
4589
4590                 ret = 0;
4591                 goto out;
4592
4593 next_extref:
4594                 len = sizeof(*extref) + ref_namelen;
4595                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4596                 cur += len;
4597
4598         }
4599 out:
4600         btrfs_release_path(&path);
4601         return ret;
4602 }
4603
4604 /*
4605  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4606  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4607  *
4608  * @root:       the root of the fs/file tree
4609  * @key:        the key of the INODE_REF/INODE_EXTREF
4610  * @size:       the st_size of the INODE_ITEM
4611  * @ext_ref:    the EXTENDED_IREF feature
4612  *
4613  * Return 0 if no error occurred.
4614  */
4615 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4616                           struct extent_buffer *node, int slot, u64 *size,
4617                           unsigned int ext_ref)
4618 {
4619         struct btrfs_dir_item *di;
4620         struct btrfs_inode_item *ii;
4621         struct btrfs_path path;
4622         struct btrfs_key location;
4623         char namebuf[BTRFS_NAME_LEN] = {0};
4624         u32 total;
4625         u32 cur = 0;
4626         u32 len;
4627         u32 name_len;
4628         u32 data_len;
4629         u8 filetype;
4630         u32 mode;
4631         u64 index;
4632         int ret;
4633         int err = 0;
4634
4635         /*
4636          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4637          * ignore index check.
4638          */
4639         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4640
4641         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4642         total = btrfs_item_size_nr(node, slot);
4643
4644         while (cur < total) {
4645                 data_len = btrfs_dir_data_len(node, di);
4646                 if (data_len)
4647                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4648                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4649                               "DIR_ITEM" : "DIR_INDEX",
4650                               key->objectid, key->offset, data_len);
4651
4652                 name_len = btrfs_dir_name_len(node, di);
4653                 if (cur + sizeof(*di) + name_len > total ||
4654                     name_len > BTRFS_NAME_LEN) {
4655                         warning("root %llu %s[%llu %llu] name too long",
4656                                 root->objectid,
4657                                 key->type == BTRFS_DIR_ITEM_KEY ?
4658                                 "DIR_ITEM" : "DIR_INDEX",
4659                                 key->objectid, key->offset);
4660
4661                         if (cur + sizeof(*di) > total)
4662                                 break;
4663                         len = min_t(u32, total - cur - sizeof(*di),
4664                                     BTRFS_NAME_LEN);
4665                 } else {
4666                         len = name_len;
4667                 }
4668                 (*size) += name_len;
4669
4670                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4671                 filetype = btrfs_dir_type(node, di);
4672
4673                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4674                     key->offset != btrfs_name_hash(namebuf, len)) {
4675                         err |= -EIO;
4676                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4677                                 root->objectid, key->objectid, key->offset,
4678                                 namebuf, len, filetype, key->offset,
4679                                 btrfs_name_hash(namebuf, len));
4680                 }
4681
4682                 btrfs_init_path(&path);
4683                 btrfs_dir_item_key_to_cpu(node, di, &location);
4684
4685                 /* Ignore related ROOT_ITEM check */
4686                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4687                         goto next;
4688
4689                 /* Check relative INODE_ITEM(existence/filetype) */
4690                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4691                 if (ret) {
4692                         err |= INODE_ITEM_MISSING;
4693                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4694                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4695                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4696                               key->offset, location.objectid, name_len,
4697                               namebuf, filetype);
4698                         goto next;
4699                 }
4700
4701                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4702                                     struct btrfs_inode_item);
4703                 mode = btrfs_inode_mode(path.nodes[0], ii);
4704
4705                 if (imode_to_type(mode) != filetype) {
4706                         err |= INODE_ITEM_MISMATCH;
4707                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4708                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4709                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4710                               key->offset, name_len, namebuf, filetype);
4711                 }
4712
4713                 /* Check relative INODE_REF/INODE_EXTREF */
4714                 location.type = BTRFS_INODE_REF_KEY;
4715                 location.offset = key->objectid;
4716                 ret = find_inode_ref(root, &location, namebuf, len,
4717                                        index, ext_ref);
4718                 err |= ret;
4719                 if (ret & INODE_REF_MISSING)
4720                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4721                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4722                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4723                               key->offset, name_len, namebuf, filetype);
4724
4725 next:
4726                 btrfs_release_path(&path);
4727                 len = sizeof(*di) + name_len + data_len;
4728                 di = (struct btrfs_dir_item *)((char *)di + len);
4729                 cur += len;
4730
4731                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4732                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4733                               root->objectid, key->objectid, key->offset);
4734                         break;
4735                 }
4736         }
4737
4738         return err;
4739 }
4740
4741 /*
4742  * Check file extent datasum/hole, update the size of the file extents,
4743  * check and update the last offset of the file extent.
4744  *
4745  * @root:       the root of fs/file tree.
4746  * @fkey:       the key of the file extent.
4747  * @nodatasum:  INODE_NODATASUM feature.
4748  * @size:       the sum of all EXTENT_DATA items size for this inode.
4749  * @end:        the offset of the last extent.
4750  *
4751  * Return 0 if no error occurred.
4752  */
4753 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4754                              struct extent_buffer *node, int slot,
4755                              unsigned int nodatasum, u64 *size, u64 *end)
4756 {
4757         struct btrfs_file_extent_item *fi;
4758         u64 disk_bytenr;
4759         u64 disk_num_bytes;
4760         u64 extent_num_bytes;
4761         u64 extent_offset;
4762         u64 csum_found;         /* In byte size, sectorsize aligned */
4763         u64 search_start;       /* Logical range start we search for csum */
4764         u64 search_len;         /* Logical range len we search for csum */
4765         unsigned int extent_type;
4766         unsigned int is_hole;
4767         int compressed = 0;
4768         int ret;
4769         int err = 0;
4770
4771         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4772
4773         /* Check inline extent */
4774         extent_type = btrfs_file_extent_type(node, fi);
4775         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4776                 struct btrfs_item *e = btrfs_item_nr(slot);
4777                 u32 item_inline_len;
4778
4779                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4780                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4781                 compressed = btrfs_file_extent_compression(node, fi);
4782                 if (extent_num_bytes == 0) {
4783                         error(
4784                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4785                                 root->objectid, fkey->objectid, fkey->offset);
4786                         err |= FILE_EXTENT_ERROR;
4787                 }
4788                 if (!compressed && extent_num_bytes != item_inline_len) {
4789                         error(
4790                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4791                                 root->objectid, fkey->objectid, fkey->offset,
4792                                 extent_num_bytes, item_inline_len);
4793                         err |= FILE_EXTENT_ERROR;
4794                 }
4795                 *end += extent_num_bytes;
4796                 *size += extent_num_bytes;
4797                 return err;
4798         }
4799
4800         /* Check extent type */
4801         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4802                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4803                 err |= FILE_EXTENT_ERROR;
4804                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4805                       root->objectid, fkey->objectid, fkey->offset);
4806                 return err;
4807         }
4808
4809         /* Check REG_EXTENT/PREALLOC_EXTENT */
4810         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4811         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4812         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4813         extent_offset = btrfs_file_extent_offset(node, fi);
4814         compressed = btrfs_file_extent_compression(node, fi);
4815         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4816
4817         /*
4818          * Check EXTENT_DATA csum
4819          *
4820          * For plain (uncompressed) extent, we should only check the range
4821          * we're referring to, as it's possible that part of prealloc extent
4822          * has been written, and has csum:
4823          *
4824          * |<--- Original large preallocated extent A ---->|
4825          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4826          *      No csum                         Has csum
4827          *
4828          * For compressed extent, we should check the whole range.
4829          */
4830         if (!compressed) {
4831                 search_start = disk_bytenr + extent_offset;
4832                 search_len = extent_num_bytes;
4833         } else {
4834                 search_start = disk_bytenr;
4835                 search_len = disk_num_bytes;
4836         }
4837         ret = count_csum_range(root, search_start, search_len, &csum_found);
4838         if (csum_found > 0 && nodatasum) {
4839                 err |= ODD_CSUM_ITEM;
4840                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4841                       root->objectid, fkey->objectid, fkey->offset);
4842         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4843                    !is_hole && (ret < 0 || csum_found < search_len)) {
4844                 err |= CSUM_ITEM_MISSING;
4845                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4846                       root->objectid, fkey->objectid, fkey->offset,
4847                       csum_found, search_len);
4848         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4849                 err |= ODD_CSUM_ITEM;
4850                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4851                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4852         }
4853
4854         /* Check EXTENT_DATA hole */
4855         if (!no_holes && *end != fkey->offset) {
4856                 err |= FILE_EXTENT_ERROR;
4857                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4858                       root->objectid, fkey->objectid, fkey->offset);
4859         }
4860
4861         *end += extent_num_bytes;
4862         if (!is_hole)
4863                 *size += extent_num_bytes;
4864
4865         return err;
4866 }
4867
4868 /*
4869  * Check INODE_ITEM and related ITEMs (the same inode number)
4870  * 1. check link count
4871  * 2. check inode ref/extref
4872  * 3. check dir item/index
4873  *
4874  * @ext_ref:    the EXTENDED_IREF feature
4875  *
4876  * Return 0 if no error occurred.
4877  * Return >0 for error or hit the traversal is done(by error bitmap)
4878  */
4879 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4880                             unsigned int ext_ref)
4881 {
4882         struct extent_buffer *node;
4883         struct btrfs_inode_item *ii;
4884         struct btrfs_key key;
4885         u64 inode_id;
4886         u32 mode;
4887         u64 nlink;
4888         u64 nbytes;
4889         u64 isize;
4890         u64 size = 0;
4891         u64 refs = 0;
4892         u64 extent_end = 0;
4893         u64 extent_size = 0;
4894         unsigned int dir;
4895         unsigned int nodatasum;
4896         int slot;
4897         int ret;
4898         int err = 0;
4899
4900         node = path->nodes[0];
4901         slot = path->slots[0];
4902
4903         btrfs_item_key_to_cpu(node, &key, slot);
4904         inode_id = key.objectid;
4905
4906         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4907                 ret = btrfs_next_item(root, path);
4908                 if (ret > 0)
4909                         err |= LAST_ITEM;
4910                 return err;
4911         }
4912
4913         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4914         isize = btrfs_inode_size(node, ii);
4915         nbytes = btrfs_inode_nbytes(node, ii);
4916         mode = btrfs_inode_mode(node, ii);
4917         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4918         nlink = btrfs_inode_nlink(node, ii);
4919         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4920
4921         while (1) {
4922                 ret = btrfs_next_item(root, path);
4923                 if (ret < 0) {
4924                         /* out will fill 'err' rusing current statistics */
4925                         goto out;
4926                 } else if (ret > 0) {
4927                         err |= LAST_ITEM;
4928                         goto out;
4929                 }
4930
4931                 node = path->nodes[0];
4932                 slot = path->slots[0];
4933                 btrfs_item_key_to_cpu(node, &key, slot);
4934                 if (key.objectid != inode_id)
4935                         goto out;
4936
4937                 switch (key.type) {
4938                 case BTRFS_INODE_REF_KEY:
4939                         ret = check_inode_ref(root, &key, node, slot, &refs,
4940                                               mode);
4941                         err |= ret;
4942                         break;
4943                 case BTRFS_INODE_EXTREF_KEY:
4944                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4945                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4946                                         root->objectid, key.objectid,
4947                                         key.offset);
4948                         ret = check_inode_extref(root, &key, node, slot, &refs,
4949                                                  mode);
4950                         err |= ret;
4951                         break;
4952                 case BTRFS_DIR_ITEM_KEY:
4953                 case BTRFS_DIR_INDEX_KEY:
4954                         if (!dir) {
4955                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4956                                         root->objectid, inode_id,
4957                                         imode_to_type(mode), key.objectid,
4958                                         key.offset);
4959                         }
4960                         ret = check_dir_item(root, &key, node, slot, &size,
4961                                              ext_ref);
4962                         err |= ret;
4963                         break;
4964                 case BTRFS_EXTENT_DATA_KEY:
4965                         if (dir) {
4966                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4967                                         root->objectid, inode_id, key.objectid,
4968                                         key.offset);
4969                         }
4970                         ret = check_file_extent(root, &key, node, slot,
4971                                                 nodatasum, &extent_size,
4972                                                 &extent_end);
4973                         err |= ret;
4974                         break;
4975                 case BTRFS_XATTR_ITEM_KEY:
4976                         break;
4977                 default:
4978                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4979                               key.objectid, key.type, key.offset);
4980                 }
4981         }
4982
4983 out:
4984         /* verify INODE_ITEM nlink/isize/nbytes */
4985         if (dir) {
4986                 if (nlink != 1) {
4987                         err |= LINK_COUNT_ERROR;
4988                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4989                               root->objectid, inode_id, nlink);
4990                 }
4991
4992                 /*
4993                  * Just a warning, as dir inode nbytes is just an
4994                  * instructive value.
4995                  */
4996                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4997                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4998                                 root->objectid, inode_id,
4999                                 root->fs_info->nodesize);
5000                 }
5001
5002                 if (isize != size) {
5003                         err |= ISIZE_ERROR;
5004                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5005                               root->objectid, inode_id, isize, size);
5006                 }
5007         } else {
5008                 if (nlink != refs) {
5009                         err |= LINK_COUNT_ERROR;
5010                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5011                               root->objectid, inode_id, nlink, refs);
5012                 } else if (!nlink) {
5013                         err |= ORPHAN_ITEM;
5014                 }
5015
5016                 if (!nbytes && !no_holes && extent_end < isize) {
5017                         err |= NBYTES_ERROR;
5018                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5019                               root->objectid, inode_id, isize);
5020                 }
5021
5022                 if (nbytes != extent_size) {
5023                         err |= NBYTES_ERROR;
5024                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5025                               root->objectid, inode_id, nbytes, extent_size);
5026                 }
5027         }
5028
5029         return err;
5030 }
5031
5032 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5033 {
5034         struct btrfs_path path;
5035         struct btrfs_key key;
5036         int err = 0;
5037         int ret;
5038
5039         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5040         key.type = BTRFS_INODE_ITEM_KEY;
5041         key.offset = 0;
5042
5043         /* For root being dropped, we don't need to check first inode */
5044         if (btrfs_root_refs(&root->root_item) == 0 &&
5045             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5046             key.objectid)
5047                 return 0;
5048
5049         btrfs_init_path(&path);
5050
5051         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5052         if (ret < 0)
5053                 goto out;
5054         if (ret > 0) {
5055                 ret = 0;
5056                 err |= INODE_ITEM_MISSING;
5057                 error("first inode item of root %llu is missing",
5058                       root->objectid);
5059         }
5060
5061         err |= check_inode_item(root, &path, ext_ref);
5062         err &= ~LAST_ITEM;
5063         if (err && !ret)
5064                 ret = -EIO;
5065 out:
5066         btrfs_release_path(&path);
5067         return ret;
5068 }
5069
5070 /*
5071  * Iterate all item on the tree and call check_inode_item() to check.
5072  *
5073  * @root:       the root of the tree to be checked.
5074  * @ext_ref:    the EXTENDED_IREF feature
5075  *
5076  * Return 0 if no error found.
5077  * Return <0 for error.
5078  */
5079 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5080 {
5081         struct btrfs_path path;
5082         struct node_refs nrefs;
5083         struct btrfs_root_item *root_item = &root->root_item;
5084         int ret;
5085         int level;
5086         int err = 0;
5087
5088         /*
5089          * We need to manually check the first inode item(256)
5090          * As the following traversal function will only start from
5091          * the first inode item in the leaf, if inode item(256) is missing
5092          * we will just skip it forever.
5093          */
5094         ret = check_fs_first_inode(root, ext_ref);
5095         if (ret < 0)
5096                 return ret;
5097
5098         memset(&nrefs, 0, sizeof(nrefs));
5099         level = btrfs_header_level(root->node);
5100         btrfs_init_path(&path);
5101
5102         if (btrfs_root_refs(root_item) > 0 ||
5103             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5104                 path.nodes[level] = root->node;
5105                 path.slots[level] = 0;
5106                 extent_buffer_get(root->node);
5107         } else {
5108                 struct btrfs_key key;
5109
5110                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5111                 level = root_item->drop_level;
5112                 path.lowest_level = level;
5113                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5114                 if (ret < 0)
5115                         goto out;
5116                 ret = 0;
5117         }
5118
5119         while (1) {
5120                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5121                 err |= !!ret;
5122
5123                 /* if ret is negative, walk shall stop */
5124                 if (ret < 0) {
5125                         ret = err;
5126                         break;
5127                 }
5128
5129                 ret = walk_up_tree_v2(root, &path, &level);
5130                 if (ret != 0) {
5131                         /* Normal exit, reset ret to err */
5132                         ret = err;
5133                         break;
5134                 }
5135         }
5136
5137 out:
5138         btrfs_release_path(&path);
5139         return ret;
5140 }
5141
5142 /*
5143  * Find the relative ref for root_ref and root_backref.
5144  *
5145  * @root:       the root of the root tree.
5146  * @ref_key:    the key of the root ref.
5147  *
5148  * Return 0 if no error occurred.
5149  */
5150 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5151                           struct extent_buffer *node, int slot)
5152 {
5153         struct btrfs_path path;
5154         struct btrfs_key key;
5155         struct btrfs_root_ref *ref;
5156         struct btrfs_root_ref *backref;
5157         char ref_name[BTRFS_NAME_LEN] = {0};
5158         char backref_name[BTRFS_NAME_LEN] = {0};
5159         u64 ref_dirid;
5160         u64 ref_seq;
5161         u32 ref_namelen;
5162         u64 backref_dirid;
5163         u64 backref_seq;
5164         u32 backref_namelen;
5165         u32 len;
5166         int ret;
5167         int err = 0;
5168
5169         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5170         ref_dirid = btrfs_root_ref_dirid(node, ref);
5171         ref_seq = btrfs_root_ref_sequence(node, ref);
5172         ref_namelen = btrfs_root_ref_name_len(node, ref);
5173
5174         if (ref_namelen <= BTRFS_NAME_LEN) {
5175                 len = ref_namelen;
5176         } else {
5177                 len = BTRFS_NAME_LEN;
5178                 warning("%s[%llu %llu] ref_name too long",
5179                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5180                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5181                         ref_key->offset);
5182         }
5183         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5184
5185         /* Find relative root_ref */
5186         key.objectid = ref_key->offset;
5187         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5188         key.offset = ref_key->objectid;
5189
5190         btrfs_init_path(&path);
5191         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5192         if (ret) {
5193                 err |= ROOT_REF_MISSING;
5194                 error("%s[%llu %llu] couldn't find relative ref",
5195                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5196                       "ROOT_REF" : "ROOT_BACKREF",
5197                       ref_key->objectid, ref_key->offset);
5198                 goto out;
5199         }
5200
5201         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5202                                  struct btrfs_root_ref);
5203         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5204         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5205         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5206
5207         if (backref_namelen <= BTRFS_NAME_LEN) {
5208                 len = backref_namelen;
5209         } else {
5210                 len = BTRFS_NAME_LEN;
5211                 warning("%s[%llu %llu] ref_name too long",
5212                         key.type == BTRFS_ROOT_REF_KEY ?
5213                         "ROOT_REF" : "ROOT_BACKREF",
5214                         key.objectid, key.offset);
5215         }
5216         read_extent_buffer(path.nodes[0], backref_name,
5217                            (unsigned long)(backref + 1), len);
5218
5219         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5220             ref_namelen != backref_namelen ||
5221             strncmp(ref_name, backref_name, len)) {
5222                 err |= ROOT_REF_MISMATCH;
5223                 error("%s[%llu %llu] mismatch relative ref",
5224                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5225                       "ROOT_REF" : "ROOT_BACKREF",
5226                       ref_key->objectid, ref_key->offset);
5227         }
5228 out:
5229         btrfs_release_path(&path);
5230         return err;
5231 }
5232
5233 /*
5234  * Check all fs/file tree in low_memory mode.
5235  *
5236  * 1. for fs tree root item, call check_fs_root_v2()
5237  * 2. for fs tree root ref/backref, call check_root_ref()
5238  *
5239  * Return 0 if no error occurred.
5240  */
5241 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5242 {
5243         struct btrfs_root *tree_root = fs_info->tree_root;
5244         struct btrfs_root *cur_root = NULL;
5245         struct btrfs_path path;
5246         struct btrfs_key key;
5247         struct extent_buffer *node;
5248         unsigned int ext_ref;
5249         int slot;
5250         int ret;
5251         int err = 0;
5252
5253         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5254
5255         btrfs_init_path(&path);
5256         key.objectid = BTRFS_FS_TREE_OBJECTID;
5257         key.offset = 0;
5258         key.type = BTRFS_ROOT_ITEM_KEY;
5259
5260         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5261         if (ret < 0) {
5262                 err = ret;
5263                 goto out;
5264         } else if (ret > 0) {
5265                 err = -ENOENT;
5266                 goto out;
5267         }
5268
5269         while (1) {
5270                 node = path.nodes[0];
5271                 slot = path.slots[0];
5272                 btrfs_item_key_to_cpu(node, &key, slot);
5273                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5274                         goto out;
5275                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5276                     fs_root_objectid(key.objectid)) {
5277                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5278                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5279                                                                        &key);
5280                         } else {
5281                                 key.offset = (u64)-1;
5282                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5283                         }
5284
5285                         if (IS_ERR(cur_root)) {
5286                                 error("Fail to read fs/subvol tree: %lld",
5287                                       key.objectid);
5288                                 err = -EIO;
5289                                 goto next;
5290                         }
5291
5292                         ret = check_fs_root_v2(cur_root, ext_ref);
5293                         err |= ret;
5294
5295                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5296                                 btrfs_free_fs_root(cur_root);
5297                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5298                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5299                         ret = check_root_ref(tree_root, &key, node, slot);
5300                         err |= ret;
5301                 }
5302 next:
5303                 ret = btrfs_next_item(tree_root, &path);
5304                 if (ret > 0)
5305                         goto out;
5306                 if (ret < 0) {
5307                         err = ret;
5308                         goto out;
5309                 }
5310         }
5311
5312 out:
5313         btrfs_release_path(&path);
5314         return err;
5315 }
5316
5317 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5318 {
5319         struct list_head *cur = rec->backrefs.next;
5320         struct extent_backref *back;
5321         struct tree_backref *tback;
5322         struct data_backref *dback;
5323         u64 found = 0;
5324         int err = 0;
5325
5326         while(cur != &rec->backrefs) {
5327                 back = to_extent_backref(cur);
5328                 cur = cur->next;
5329                 if (!back->found_extent_tree) {
5330                         err = 1;
5331                         if (!print_errs)
5332                                 goto out;
5333                         if (back->is_data) {
5334                                 dback = to_data_backref(back);
5335                                 fprintf(stderr, "Backref %llu %s %llu"
5336                                         " owner %llu offset %llu num_refs %lu"
5337                                         " not found in extent tree\n",
5338                                         (unsigned long long)rec->start,
5339                                         back->full_backref ?
5340                                         "parent" : "root",
5341                                         back->full_backref ?
5342                                         (unsigned long long)dback->parent:
5343                                         (unsigned long long)dback->root,
5344                                         (unsigned long long)dback->owner,
5345                                         (unsigned long long)dback->offset,
5346                                         (unsigned long)dback->num_refs);
5347                         } else {
5348                                 tback = to_tree_backref(back);
5349                                 fprintf(stderr, "Backref %llu parent %llu"
5350                                         " root %llu not found in extent tree\n",
5351                                         (unsigned long long)rec->start,
5352                                         (unsigned long long)tback->parent,
5353                                         (unsigned long long)tback->root);
5354                         }
5355                 }
5356                 if (!back->is_data && !back->found_ref) {
5357                         err = 1;
5358                         if (!print_errs)
5359                                 goto out;
5360                         tback = to_tree_backref(back);
5361                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5362                                 (unsigned long long)rec->start,
5363                                 back->full_backref ? "parent" : "root",
5364                                 back->full_backref ?
5365                                 (unsigned long long)tback->parent :
5366                                 (unsigned long long)tback->root, back);
5367                 }
5368                 if (back->is_data) {
5369                         dback = to_data_backref(back);
5370                         if (dback->found_ref != dback->num_refs) {
5371                                 err = 1;
5372                                 if (!print_errs)
5373                                         goto out;
5374                                 fprintf(stderr, "Incorrect local backref count"
5375                                         " on %llu %s %llu owner %llu"
5376                                         " offset %llu found %u wanted %u back %p\n",
5377                                         (unsigned long long)rec->start,
5378                                         back->full_backref ?
5379                                         "parent" : "root",
5380                                         back->full_backref ?
5381                                         (unsigned long long)dback->parent:
5382                                         (unsigned long long)dback->root,
5383                                         (unsigned long long)dback->owner,
5384                                         (unsigned long long)dback->offset,
5385                                         dback->found_ref, dback->num_refs, back);
5386                         }
5387                         if (dback->disk_bytenr != rec->start) {
5388                                 err = 1;
5389                                 if (!print_errs)
5390                                         goto out;
5391                                 fprintf(stderr, "Backref disk bytenr does not"
5392                                         " match extent record, bytenr=%llu, "
5393                                         "ref bytenr=%llu\n",
5394                                         (unsigned long long)rec->start,
5395                                         (unsigned long long)dback->disk_bytenr);
5396                         }
5397
5398                         if (dback->bytes != rec->nr) {
5399                                 err = 1;
5400                                 if (!print_errs)
5401                                         goto out;
5402                                 fprintf(stderr, "Backref bytes do not match "
5403                                         "extent backref, bytenr=%llu, ref "
5404                                         "bytes=%llu, backref bytes=%llu\n",
5405                                         (unsigned long long)rec->start,
5406                                         (unsigned long long)rec->nr,
5407                                         (unsigned long long)dback->bytes);
5408                         }
5409                 }
5410                 if (!back->is_data) {
5411                         found += 1;
5412                 } else {
5413                         dback = to_data_backref(back);
5414                         found += dback->found_ref;
5415                 }
5416         }
5417         if (found != rec->refs) {
5418                 err = 1;
5419                 if (!print_errs)
5420                         goto out;
5421                 fprintf(stderr, "Incorrect global backref count "
5422                         "on %llu found %llu wanted %llu\n",
5423                         (unsigned long long)rec->start,
5424                         (unsigned long long)found,
5425                         (unsigned long long)rec->refs);
5426         }
5427 out:
5428         return err;
5429 }
5430
5431 static int free_all_extent_backrefs(struct extent_record *rec)
5432 {
5433         struct extent_backref *back;
5434         struct list_head *cur;
5435         while (!list_empty(&rec->backrefs)) {
5436                 cur = rec->backrefs.next;
5437                 back = to_extent_backref(cur);
5438                 list_del(cur);
5439                 free(back);
5440         }
5441         return 0;
5442 }
5443
5444 static void free_extent_record_cache(struct cache_tree *extent_cache)
5445 {
5446         struct cache_extent *cache;
5447         struct extent_record *rec;
5448
5449         while (1) {
5450                 cache = first_cache_extent(extent_cache);
5451                 if (!cache)
5452                         break;
5453                 rec = container_of(cache, struct extent_record, cache);
5454                 remove_cache_extent(extent_cache, cache);
5455                 free_all_extent_backrefs(rec);
5456                 free(rec);
5457         }
5458 }
5459
5460 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5461                                  struct extent_record *rec)
5462 {
5463         if (rec->content_checked && rec->owner_ref_checked &&
5464             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5465             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5466             !rec->bad_full_backref && !rec->crossing_stripes &&
5467             !rec->wrong_chunk_type) {
5468                 remove_cache_extent(extent_cache, &rec->cache);
5469                 free_all_extent_backrefs(rec);
5470                 list_del_init(&rec->list);
5471                 free(rec);
5472         }
5473         return 0;
5474 }
5475
5476 static int check_owner_ref(struct btrfs_root *root,
5477                             struct extent_record *rec,
5478                             struct extent_buffer *buf)
5479 {
5480         struct extent_backref *node;
5481         struct tree_backref *back;
5482         struct btrfs_root *ref_root;
5483         struct btrfs_key key;
5484         struct btrfs_path path;
5485         struct extent_buffer *parent;
5486         int level;
5487         int found = 0;
5488         int ret;
5489
5490         list_for_each_entry(node, &rec->backrefs, list) {
5491                 if (node->is_data)
5492                         continue;
5493                 if (!node->found_ref)
5494                         continue;
5495                 if (node->full_backref)
5496                         continue;
5497                 back = to_tree_backref(node);
5498                 if (btrfs_header_owner(buf) == back->root)
5499                         return 0;
5500         }
5501         BUG_ON(rec->is_root);
5502
5503         /* try to find the block by search corresponding fs tree */
5504         key.objectid = btrfs_header_owner(buf);
5505         key.type = BTRFS_ROOT_ITEM_KEY;
5506         key.offset = (u64)-1;
5507
5508         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5509         if (IS_ERR(ref_root))
5510                 return 1;
5511
5512         level = btrfs_header_level(buf);
5513         if (level == 0)
5514                 btrfs_item_key_to_cpu(buf, &key, 0);
5515         else
5516                 btrfs_node_key_to_cpu(buf, &key, 0);
5517
5518         btrfs_init_path(&path);
5519         path.lowest_level = level + 1;
5520         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5521         if (ret < 0)
5522                 return 0;
5523
5524         parent = path.nodes[level + 1];
5525         if (parent && buf->start == btrfs_node_blockptr(parent,
5526                                                         path.slots[level + 1]))
5527                 found = 1;
5528
5529         btrfs_release_path(&path);
5530         return found ? 0 : 1;
5531 }
5532
5533 static int is_extent_tree_record(struct extent_record *rec)
5534 {
5535         struct list_head *cur = rec->backrefs.next;
5536         struct extent_backref *node;
5537         struct tree_backref *back;
5538         int is_extent = 0;
5539
5540         while(cur != &rec->backrefs) {
5541                 node = to_extent_backref(cur);
5542                 cur = cur->next;
5543                 if (node->is_data)
5544                         return 0;
5545                 back = to_tree_backref(node);
5546                 if (node->full_backref)
5547                         return 0;
5548                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5549                         is_extent = 1;
5550         }
5551         return is_extent;
5552 }
5553
5554
5555 static int record_bad_block_io(struct btrfs_fs_info *info,
5556                                struct cache_tree *extent_cache,
5557                                u64 start, u64 len)
5558 {
5559         struct extent_record *rec;
5560         struct cache_extent *cache;
5561         struct btrfs_key key;
5562
5563         cache = lookup_cache_extent(extent_cache, start, len);
5564         if (!cache)
5565                 return 0;
5566
5567         rec = container_of(cache, struct extent_record, cache);
5568         if (!is_extent_tree_record(rec))
5569                 return 0;
5570
5571         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5572         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5573 }
5574
5575 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5576                        struct extent_buffer *buf, int slot)
5577 {
5578         if (btrfs_header_level(buf)) {
5579                 struct btrfs_key_ptr ptr1, ptr2;
5580
5581                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5582                                    sizeof(struct btrfs_key_ptr));
5583                 read_extent_buffer(buf, &ptr2,
5584                                    btrfs_node_key_ptr_offset(slot + 1),
5585                                    sizeof(struct btrfs_key_ptr));
5586                 write_extent_buffer(buf, &ptr1,
5587                                     btrfs_node_key_ptr_offset(slot + 1),
5588                                     sizeof(struct btrfs_key_ptr));
5589                 write_extent_buffer(buf, &ptr2,
5590                                     btrfs_node_key_ptr_offset(slot),
5591                                     sizeof(struct btrfs_key_ptr));
5592                 if (slot == 0) {
5593                         struct btrfs_disk_key key;
5594                         btrfs_node_key(buf, &key, 0);
5595                         btrfs_fixup_low_keys(root, path, &key,
5596                                              btrfs_header_level(buf) + 1);
5597                 }
5598         } else {
5599                 struct btrfs_item *item1, *item2;
5600                 struct btrfs_key k1, k2;
5601                 char *item1_data, *item2_data;
5602                 u32 item1_offset, item2_offset, item1_size, item2_size;
5603
5604                 item1 = btrfs_item_nr(slot);
5605                 item2 = btrfs_item_nr(slot + 1);
5606                 btrfs_item_key_to_cpu(buf, &k1, slot);
5607                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5608                 item1_offset = btrfs_item_offset(buf, item1);
5609                 item2_offset = btrfs_item_offset(buf, item2);
5610                 item1_size = btrfs_item_size(buf, item1);
5611                 item2_size = btrfs_item_size(buf, item2);
5612
5613                 item1_data = malloc(item1_size);
5614                 if (!item1_data)
5615                         return -ENOMEM;
5616                 item2_data = malloc(item2_size);
5617                 if (!item2_data) {
5618                         free(item1_data);
5619                         return -ENOMEM;
5620                 }
5621
5622                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5623                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5624
5625                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5626                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5627                 free(item1_data);
5628                 free(item2_data);
5629
5630                 btrfs_set_item_offset(buf, item1, item2_offset);
5631                 btrfs_set_item_offset(buf, item2, item1_offset);
5632                 btrfs_set_item_size(buf, item1, item2_size);
5633                 btrfs_set_item_size(buf, item2, item1_size);
5634
5635                 path->slots[0] = slot;
5636                 btrfs_set_item_key_unsafe(root, path, &k2);
5637                 path->slots[0] = slot + 1;
5638                 btrfs_set_item_key_unsafe(root, path, &k1);
5639         }
5640         return 0;
5641 }
5642
5643 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5644 {
5645         struct extent_buffer *buf;
5646         struct btrfs_key k1, k2;
5647         int i;
5648         int level = path->lowest_level;
5649         int ret = -EIO;
5650
5651         buf = path->nodes[level];
5652         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5653                 if (level) {
5654                         btrfs_node_key_to_cpu(buf, &k1, i);
5655                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5656                 } else {
5657                         btrfs_item_key_to_cpu(buf, &k1, i);
5658                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5659                 }
5660                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5661                         continue;
5662                 ret = swap_values(root, path, buf, i);
5663                 if (ret)
5664                         break;
5665                 btrfs_mark_buffer_dirty(buf);
5666                 i = 0;
5667         }
5668         return ret;
5669 }
5670
5671 static int delete_bogus_item(struct btrfs_root *root,
5672                              struct btrfs_path *path,
5673                              struct extent_buffer *buf, int slot)
5674 {
5675         struct btrfs_key key;
5676         int nritems = btrfs_header_nritems(buf);
5677
5678         btrfs_item_key_to_cpu(buf, &key, slot);
5679
5680         /* These are all the keys we can deal with missing. */
5681         if (key.type != BTRFS_DIR_INDEX_KEY &&
5682             key.type != BTRFS_EXTENT_ITEM_KEY &&
5683             key.type != BTRFS_METADATA_ITEM_KEY &&
5684             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5685             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5686                 return -1;
5687
5688         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5689                (unsigned long long)key.objectid, key.type,
5690                (unsigned long long)key.offset, slot, buf->start);
5691         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5692                               btrfs_item_nr_offset(slot + 1),
5693                               sizeof(struct btrfs_item) *
5694                               (nritems - slot - 1));
5695         btrfs_set_header_nritems(buf, nritems - 1);
5696         if (slot == 0) {
5697                 struct btrfs_disk_key disk_key;
5698
5699                 btrfs_item_key(buf, &disk_key, 0);
5700                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5701         }
5702         btrfs_mark_buffer_dirty(buf);
5703         return 0;
5704 }
5705
5706 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5707 {
5708         struct extent_buffer *buf;
5709         int i;
5710         int ret = 0;
5711
5712         /* We should only get this for leaves */
5713         BUG_ON(path->lowest_level);
5714         buf = path->nodes[0];
5715 again:
5716         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5717                 unsigned int shift = 0, offset;
5718
5719                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5720                     BTRFS_LEAF_DATA_SIZE(root)) {
5721                         if (btrfs_item_end_nr(buf, i) >
5722                             BTRFS_LEAF_DATA_SIZE(root)) {
5723                                 ret = delete_bogus_item(root, path, buf, i);
5724                                 if (!ret)
5725                                         goto again;
5726                                 fprintf(stderr, "item is off the end of the "
5727                                         "leaf, can't fix\n");
5728                                 ret = -EIO;
5729                                 break;
5730                         }
5731                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5732                                 btrfs_item_end_nr(buf, i);
5733                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5734                            btrfs_item_offset_nr(buf, i - 1)) {
5735                         if (btrfs_item_end_nr(buf, i) >
5736                             btrfs_item_offset_nr(buf, i - 1)) {
5737                                 ret = delete_bogus_item(root, path, buf, i);
5738                                 if (!ret)
5739                                         goto again;
5740                                 fprintf(stderr, "items overlap, can't fix\n");
5741                                 ret = -EIO;
5742                                 break;
5743                         }
5744                         shift = btrfs_item_offset_nr(buf, i - 1) -
5745                                 btrfs_item_end_nr(buf, i);
5746                 }
5747                 if (!shift)
5748                         continue;
5749
5750                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5751                        i, shift, (unsigned long long)buf->start);
5752                 offset = btrfs_item_offset_nr(buf, i);
5753                 memmove_extent_buffer(buf,
5754                                       btrfs_leaf_data(buf) + offset + shift,
5755                                       btrfs_leaf_data(buf) + offset,
5756                                       btrfs_item_size_nr(buf, i));
5757                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5758                                       offset + shift);
5759                 btrfs_mark_buffer_dirty(buf);
5760         }
5761
5762         /*
5763          * We may have moved things, in which case we want to exit so we don't
5764          * write those changes out.  Once we have proper abort functionality in
5765          * progs this can be changed to something nicer.
5766          */
5767         BUG_ON(ret);
5768         return ret;
5769 }
5770
5771 /*
5772  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5773  * then just return -EIO.
5774  */
5775 static int try_to_fix_bad_block(struct btrfs_root *root,
5776                                 struct extent_buffer *buf,
5777                                 enum btrfs_tree_block_status status)
5778 {
5779         struct btrfs_trans_handle *trans;
5780         struct ulist *roots;
5781         struct ulist_node *node;
5782         struct btrfs_root *search_root;
5783         struct btrfs_path path;
5784         struct ulist_iterator iter;
5785         struct btrfs_key root_key, key;
5786         int ret;
5787
5788         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5789             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5790                 return -EIO;
5791
5792         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5793         if (ret)
5794                 return -EIO;
5795
5796         btrfs_init_path(&path);
5797         ULIST_ITER_INIT(&iter);
5798         while ((node = ulist_next(roots, &iter))) {
5799                 root_key.objectid = node->val;
5800                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5801                 root_key.offset = (u64)-1;
5802
5803                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5804                 if (IS_ERR(root)) {
5805                         ret = -EIO;
5806                         break;
5807                 }
5808
5809
5810                 trans = btrfs_start_transaction(search_root, 0);
5811                 if (IS_ERR(trans)) {
5812                         ret = PTR_ERR(trans);
5813                         break;
5814                 }
5815
5816                 path.lowest_level = btrfs_header_level(buf);
5817                 path.skip_check_block = 1;
5818                 if (path.lowest_level)
5819                         btrfs_node_key_to_cpu(buf, &key, 0);
5820                 else
5821                         btrfs_item_key_to_cpu(buf, &key, 0);
5822                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5823                 if (ret) {
5824                         ret = -EIO;
5825                         btrfs_commit_transaction(trans, search_root);
5826                         break;
5827                 }
5828                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5829                         ret = fix_key_order(search_root, &path);
5830                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5831                         ret = fix_item_offset(search_root, &path);
5832                 if (ret) {
5833                         btrfs_commit_transaction(trans, search_root);
5834                         break;
5835                 }
5836                 btrfs_release_path(&path);
5837                 btrfs_commit_transaction(trans, search_root);
5838         }
5839         ulist_free(roots);
5840         btrfs_release_path(&path);
5841         return ret;
5842 }
5843
5844 static int check_block(struct btrfs_root *root,
5845                        struct cache_tree *extent_cache,
5846                        struct extent_buffer *buf, u64 flags)
5847 {
5848         struct extent_record *rec;
5849         struct cache_extent *cache;
5850         struct btrfs_key key;
5851         enum btrfs_tree_block_status status;
5852         int ret = 0;
5853         int level;
5854
5855         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5856         if (!cache)
5857                 return 1;
5858         rec = container_of(cache, struct extent_record, cache);
5859         rec->generation = btrfs_header_generation(buf);
5860
5861         level = btrfs_header_level(buf);
5862         if (btrfs_header_nritems(buf) > 0) {
5863
5864                 if (level == 0)
5865                         btrfs_item_key_to_cpu(buf, &key, 0);
5866                 else
5867                         btrfs_node_key_to_cpu(buf, &key, 0);
5868
5869                 rec->info_objectid = key.objectid;
5870         }
5871         rec->info_level = level;
5872
5873         if (btrfs_is_leaf(buf))
5874                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5875         else
5876                 status = btrfs_check_node(root, &rec->parent_key, buf);
5877
5878         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5879                 if (repair)
5880                         status = try_to_fix_bad_block(root, buf, status);
5881                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5882                         ret = -EIO;
5883                         fprintf(stderr, "bad block %llu\n",
5884                                 (unsigned long long)buf->start);
5885                 } else {
5886                         /*
5887                          * Signal to callers we need to start the scan over
5888                          * again since we'll have cowed blocks.
5889                          */
5890                         ret = -EAGAIN;
5891                 }
5892         } else {
5893                 rec->content_checked = 1;
5894                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5895                         rec->owner_ref_checked = 1;
5896                 else {
5897                         ret = check_owner_ref(root, rec, buf);
5898                         if (!ret)
5899                                 rec->owner_ref_checked = 1;
5900                 }
5901         }
5902         if (!ret)
5903                 maybe_free_extent_rec(extent_cache, rec);
5904         return ret;
5905 }
5906
5907 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5908                                                 u64 parent, u64 root)
5909 {
5910         struct list_head *cur = rec->backrefs.next;
5911         struct extent_backref *node;
5912         struct tree_backref *back;
5913
5914         while(cur != &rec->backrefs) {
5915                 node = to_extent_backref(cur);
5916                 cur = cur->next;
5917                 if (node->is_data)
5918                         continue;
5919                 back = to_tree_backref(node);
5920                 if (parent > 0) {
5921                         if (!node->full_backref)
5922                                 continue;
5923                         if (parent == back->parent)
5924                                 return back;
5925                 } else {
5926                         if (node->full_backref)
5927                                 continue;
5928                         if (back->root == root)
5929                                 return back;
5930                 }
5931         }
5932         return NULL;
5933 }
5934
5935 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5936                                                 u64 parent, u64 root)
5937 {
5938         struct tree_backref *ref = malloc(sizeof(*ref));
5939
5940         if (!ref)
5941                 return NULL;
5942         memset(&ref->node, 0, sizeof(ref->node));
5943         if (parent > 0) {
5944                 ref->parent = parent;
5945                 ref->node.full_backref = 1;
5946         } else {
5947                 ref->root = root;
5948                 ref->node.full_backref = 0;
5949         }
5950         list_add_tail(&ref->node.list, &rec->backrefs);
5951
5952         return ref;
5953 }
5954
5955 static struct data_backref *find_data_backref(struct extent_record *rec,
5956                                                 u64 parent, u64 root,
5957                                                 u64 owner, u64 offset,
5958                                                 int found_ref,
5959                                                 u64 disk_bytenr, u64 bytes)
5960 {
5961         struct list_head *cur = rec->backrefs.next;
5962         struct extent_backref *node;
5963         struct data_backref *back;
5964
5965         while(cur != &rec->backrefs) {
5966                 node = to_extent_backref(cur);
5967                 cur = cur->next;
5968                 if (!node->is_data)
5969                         continue;
5970                 back = to_data_backref(node);
5971                 if (parent > 0) {
5972                         if (!node->full_backref)
5973                                 continue;
5974                         if (parent == back->parent)
5975                                 return back;
5976                 } else {
5977                         if (node->full_backref)
5978                                 continue;
5979                         if (back->root == root && back->owner == owner &&
5980                             back->offset == offset) {
5981                                 if (found_ref && node->found_ref &&
5982                                     (back->bytes != bytes ||
5983                                     back->disk_bytenr != disk_bytenr))
5984                                         continue;
5985                                 return back;
5986                         }
5987                 }
5988         }
5989         return NULL;
5990 }
5991
5992 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5993                                                 u64 parent, u64 root,
5994                                                 u64 owner, u64 offset,
5995                                                 u64 max_size)
5996 {
5997         struct data_backref *ref = malloc(sizeof(*ref));
5998
5999         if (!ref)
6000                 return NULL;
6001         memset(&ref->node, 0, sizeof(ref->node));
6002         ref->node.is_data = 1;
6003
6004         if (parent > 0) {
6005                 ref->parent = parent;
6006                 ref->owner = 0;
6007                 ref->offset = 0;
6008                 ref->node.full_backref = 1;
6009         } else {
6010                 ref->root = root;
6011                 ref->owner = owner;
6012                 ref->offset = offset;
6013                 ref->node.full_backref = 0;
6014         }
6015         ref->bytes = max_size;
6016         ref->found_ref = 0;
6017         ref->num_refs = 0;
6018         list_add_tail(&ref->node.list, &rec->backrefs);
6019         if (max_size > rec->max_size)
6020                 rec->max_size = max_size;
6021         return ref;
6022 }
6023
6024 /* Check if the type of extent matches with its chunk */
6025 static void check_extent_type(struct extent_record *rec)
6026 {
6027         struct btrfs_block_group_cache *bg_cache;
6028
6029         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6030         if (!bg_cache)
6031                 return;
6032
6033         /* data extent, check chunk directly*/
6034         if (!rec->metadata) {
6035                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6036                         rec->wrong_chunk_type = 1;
6037                 return;
6038         }
6039
6040         /* metadata extent, check the obvious case first */
6041         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6042                                  BTRFS_BLOCK_GROUP_METADATA))) {
6043                 rec->wrong_chunk_type = 1;
6044                 return;
6045         }
6046
6047         /*
6048          * Check SYSTEM extent, as it's also marked as metadata, we can only
6049          * make sure it's a SYSTEM extent by its backref
6050          */
6051         if (!list_empty(&rec->backrefs)) {
6052                 struct extent_backref *node;
6053                 struct tree_backref *tback;
6054                 u64 bg_type;
6055
6056                 node = to_extent_backref(rec->backrefs.next);
6057                 if (node->is_data) {
6058                         /* tree block shouldn't have data backref */
6059                         rec->wrong_chunk_type = 1;
6060                         return;
6061                 }
6062                 tback = container_of(node, struct tree_backref, node);
6063
6064                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6065                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6066                 else
6067                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6068                 if (!(bg_cache->flags & bg_type))
6069                         rec->wrong_chunk_type = 1;
6070         }
6071 }
6072
6073 /*
6074  * Allocate a new extent record, fill default values from @tmpl and insert int
6075  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6076  * the cache, otherwise it fails.
6077  */
6078 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6079                 struct extent_record *tmpl)
6080 {
6081         struct extent_record *rec;
6082         int ret = 0;
6083
6084         BUG_ON(tmpl->max_size == 0);
6085         rec = malloc(sizeof(*rec));
6086         if (!rec)
6087                 return -ENOMEM;
6088         rec->start = tmpl->start;
6089         rec->max_size = tmpl->max_size;
6090         rec->nr = max(tmpl->nr, tmpl->max_size);
6091         rec->found_rec = tmpl->found_rec;
6092         rec->content_checked = tmpl->content_checked;
6093         rec->owner_ref_checked = tmpl->owner_ref_checked;
6094         rec->num_duplicates = 0;
6095         rec->metadata = tmpl->metadata;
6096         rec->flag_block_full_backref = FLAG_UNSET;
6097         rec->bad_full_backref = 0;
6098         rec->crossing_stripes = 0;
6099         rec->wrong_chunk_type = 0;
6100         rec->is_root = tmpl->is_root;
6101         rec->refs = tmpl->refs;
6102         rec->extent_item_refs = tmpl->extent_item_refs;
6103         rec->parent_generation = tmpl->parent_generation;
6104         INIT_LIST_HEAD(&rec->backrefs);
6105         INIT_LIST_HEAD(&rec->dups);
6106         INIT_LIST_HEAD(&rec->list);
6107         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6108         rec->cache.start = tmpl->start;
6109         rec->cache.size = tmpl->nr;
6110         ret = insert_cache_extent(extent_cache, &rec->cache);
6111         if (ret) {
6112                 free(rec);
6113                 return ret;
6114         }
6115         bytes_used += rec->nr;
6116
6117         if (tmpl->metadata)
6118                 rec->crossing_stripes = check_crossing_stripes(global_info,
6119                                 rec->start, global_info->nodesize);
6120         check_extent_type(rec);
6121         return ret;
6122 }
6123
6124 /*
6125  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6126  * some are hints:
6127  * - refs              - if found, increase refs
6128  * - is_root           - if found, set
6129  * - content_checked   - if found, set
6130  * - owner_ref_checked - if found, set
6131  *
6132  * If not found, create a new one, initialize and insert.
6133  */
6134 static int add_extent_rec(struct cache_tree *extent_cache,
6135                 struct extent_record *tmpl)
6136 {
6137         struct extent_record *rec;
6138         struct cache_extent *cache;
6139         int ret = 0;
6140         int dup = 0;
6141
6142         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6143         if (cache) {
6144                 rec = container_of(cache, struct extent_record, cache);
6145                 if (tmpl->refs)
6146                         rec->refs++;
6147                 if (rec->nr == 1)
6148                         rec->nr = max(tmpl->nr, tmpl->max_size);
6149
6150                 /*
6151                  * We need to make sure to reset nr to whatever the extent
6152                  * record says was the real size, this way we can compare it to
6153                  * the backrefs.
6154                  */
6155                 if (tmpl->found_rec) {
6156                         if (tmpl->start != rec->start || rec->found_rec) {
6157                                 struct extent_record *tmp;
6158
6159                                 dup = 1;
6160                                 if (list_empty(&rec->list))
6161                                         list_add_tail(&rec->list,
6162                                                       &duplicate_extents);
6163
6164                                 /*
6165                                  * We have to do this song and dance in case we
6166                                  * find an extent record that falls inside of
6167                                  * our current extent record but does not have
6168                                  * the same objectid.
6169                                  */
6170                                 tmp = malloc(sizeof(*tmp));
6171                                 if (!tmp)
6172                                         return -ENOMEM;
6173                                 tmp->start = tmpl->start;
6174                                 tmp->max_size = tmpl->max_size;
6175                                 tmp->nr = tmpl->nr;
6176                                 tmp->found_rec = 1;
6177                                 tmp->metadata = tmpl->metadata;
6178                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6179                                 INIT_LIST_HEAD(&tmp->list);
6180                                 list_add_tail(&tmp->list, &rec->dups);
6181                                 rec->num_duplicates++;
6182                         } else {
6183                                 rec->nr = tmpl->nr;
6184                                 rec->found_rec = 1;
6185                         }
6186                 }
6187
6188                 if (tmpl->extent_item_refs && !dup) {
6189                         if (rec->extent_item_refs) {
6190                                 fprintf(stderr, "block %llu rec "
6191                                         "extent_item_refs %llu, passed %llu\n",
6192                                         (unsigned long long)tmpl->start,
6193                                         (unsigned long long)
6194                                                         rec->extent_item_refs,
6195                                         (unsigned long long)tmpl->extent_item_refs);
6196                         }
6197                         rec->extent_item_refs = tmpl->extent_item_refs;
6198                 }
6199                 if (tmpl->is_root)
6200                         rec->is_root = 1;
6201                 if (tmpl->content_checked)
6202                         rec->content_checked = 1;
6203                 if (tmpl->owner_ref_checked)
6204                         rec->owner_ref_checked = 1;
6205                 memcpy(&rec->parent_key, &tmpl->parent_key,
6206                                 sizeof(tmpl->parent_key));
6207                 if (tmpl->parent_generation)
6208                         rec->parent_generation = tmpl->parent_generation;
6209                 if (rec->max_size < tmpl->max_size)
6210                         rec->max_size = tmpl->max_size;
6211
6212                 /*
6213                  * A metadata extent can't cross stripe_len boundary, otherwise
6214                  * kernel scrub won't be able to handle it.
6215                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6216                  * it.
6217                  */
6218                 if (tmpl->metadata)
6219                         rec->crossing_stripes = check_crossing_stripes(
6220                                         global_info, rec->start,
6221                                         global_info->nodesize);
6222                 check_extent_type(rec);
6223                 maybe_free_extent_rec(extent_cache, rec);
6224                 return ret;
6225         }
6226
6227         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6228
6229         return ret;
6230 }
6231
6232 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6233                             u64 parent, u64 root, int found_ref)
6234 {
6235         struct extent_record *rec;
6236         struct tree_backref *back;
6237         struct cache_extent *cache;
6238         int ret;
6239
6240         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6241         if (!cache) {
6242                 struct extent_record tmpl;
6243
6244                 memset(&tmpl, 0, sizeof(tmpl));
6245                 tmpl.start = bytenr;
6246                 tmpl.nr = 1;
6247                 tmpl.metadata = 1;
6248                 tmpl.max_size = 1;
6249
6250                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6251                 if (ret)
6252                         return ret;
6253
6254                 /* really a bug in cache_extent implement now */
6255                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6256                 if (!cache)
6257                         return -ENOENT;
6258         }
6259
6260         rec = container_of(cache, struct extent_record, cache);
6261         if (rec->start != bytenr) {
6262                 /*
6263                  * Several cause, from unaligned bytenr to over lapping extents
6264                  */
6265                 return -EEXIST;
6266         }
6267
6268         back = find_tree_backref(rec, parent, root);
6269         if (!back) {
6270                 back = alloc_tree_backref(rec, parent, root);
6271                 if (!back)
6272                         return -ENOMEM;
6273         }
6274
6275         if (found_ref) {
6276                 if (back->node.found_ref) {
6277                         fprintf(stderr, "Extent back ref already exists "
6278                                 "for %llu parent %llu root %llu \n",
6279                                 (unsigned long long)bytenr,
6280                                 (unsigned long long)parent,
6281                                 (unsigned long long)root);
6282                 }
6283                 back->node.found_ref = 1;
6284         } else {
6285                 if (back->node.found_extent_tree) {
6286                         fprintf(stderr, "Extent back ref already exists "
6287                                 "for %llu parent %llu root %llu \n",
6288                                 (unsigned long long)bytenr,
6289                                 (unsigned long long)parent,
6290                                 (unsigned long long)root);
6291                 }
6292                 back->node.found_extent_tree = 1;
6293         }
6294         check_extent_type(rec);
6295         maybe_free_extent_rec(extent_cache, rec);
6296         return 0;
6297 }
6298
6299 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6300                             u64 parent, u64 root, u64 owner, u64 offset,
6301                             u32 num_refs, int found_ref, u64 max_size)
6302 {
6303         struct extent_record *rec;
6304         struct data_backref *back;
6305         struct cache_extent *cache;
6306         int ret;
6307
6308         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6309         if (!cache) {
6310                 struct extent_record tmpl;
6311
6312                 memset(&tmpl, 0, sizeof(tmpl));
6313                 tmpl.start = bytenr;
6314                 tmpl.nr = 1;
6315                 tmpl.max_size = max_size;
6316
6317                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6318                 if (ret)
6319                         return ret;
6320
6321                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6322                 if (!cache)
6323                         abort();
6324         }
6325
6326         rec = container_of(cache, struct extent_record, cache);
6327         if (rec->max_size < max_size)
6328                 rec->max_size = max_size;
6329
6330         /*
6331          * If found_ref is set then max_size is the real size and must match the
6332          * existing refs.  So if we have already found a ref then we need to
6333          * make sure that this ref matches the existing one, otherwise we need
6334          * to add a new backref so we can notice that the backrefs don't match
6335          * and we need to figure out who is telling the truth.  This is to
6336          * account for that awful fsync bug I introduced where we'd end up with
6337          * a btrfs_file_extent_item that would have its length include multiple
6338          * prealloc extents or point inside of a prealloc extent.
6339          */
6340         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6341                                  bytenr, max_size);
6342         if (!back) {
6343                 back = alloc_data_backref(rec, parent, root, owner, offset,
6344                                           max_size);
6345                 BUG_ON(!back);
6346         }
6347
6348         if (found_ref) {
6349                 BUG_ON(num_refs != 1);
6350                 if (back->node.found_ref)
6351                         BUG_ON(back->bytes != max_size);
6352                 back->node.found_ref = 1;
6353                 back->found_ref += 1;
6354                 back->bytes = max_size;
6355                 back->disk_bytenr = bytenr;
6356                 rec->refs += 1;
6357                 rec->content_checked = 1;
6358                 rec->owner_ref_checked = 1;
6359         } else {
6360                 if (back->node.found_extent_tree) {
6361                         fprintf(stderr, "Extent back ref already exists "
6362                                 "for %llu parent %llu root %llu "
6363                                 "owner %llu offset %llu num_refs %lu\n",
6364                                 (unsigned long long)bytenr,
6365                                 (unsigned long long)parent,
6366                                 (unsigned long long)root,
6367                                 (unsigned long long)owner,
6368                                 (unsigned long long)offset,
6369                                 (unsigned long)num_refs);
6370                 }
6371                 back->num_refs = num_refs;
6372                 back->node.found_extent_tree = 1;
6373         }
6374         maybe_free_extent_rec(extent_cache, rec);
6375         return 0;
6376 }
6377
6378 static int add_pending(struct cache_tree *pending,
6379                        struct cache_tree *seen, u64 bytenr, u32 size)
6380 {
6381         int ret;
6382         ret = add_cache_extent(seen, bytenr, size);
6383         if (ret)
6384                 return ret;
6385         add_cache_extent(pending, bytenr, size);
6386         return 0;
6387 }
6388
6389 static int pick_next_pending(struct cache_tree *pending,
6390                         struct cache_tree *reada,
6391                         struct cache_tree *nodes,
6392                         u64 last, struct block_info *bits, int bits_nr,
6393                         int *reada_bits)
6394 {
6395         unsigned long node_start = last;
6396         struct cache_extent *cache;
6397         int ret;
6398
6399         cache = search_cache_extent(reada, 0);
6400         if (cache) {
6401                 bits[0].start = cache->start;
6402                 bits[0].size = cache->size;
6403                 *reada_bits = 1;
6404                 return 1;
6405         }
6406         *reada_bits = 0;
6407         if (node_start > 32768)
6408                 node_start -= 32768;
6409
6410         cache = search_cache_extent(nodes, node_start);
6411         if (!cache)
6412                 cache = search_cache_extent(nodes, 0);
6413
6414         if (!cache) {
6415                  cache = search_cache_extent(pending, 0);
6416                  if (!cache)
6417                          return 0;
6418                  ret = 0;
6419                  do {
6420                          bits[ret].start = cache->start;
6421                          bits[ret].size = cache->size;
6422                          cache = next_cache_extent(cache);
6423                          ret++;
6424                  } while (cache && ret < bits_nr);
6425                  return ret;
6426         }
6427
6428         ret = 0;
6429         do {
6430                 bits[ret].start = cache->start;
6431                 bits[ret].size = cache->size;
6432                 cache = next_cache_extent(cache);
6433                 ret++;
6434         } while (cache && ret < bits_nr);
6435
6436         if (bits_nr - ret > 8) {
6437                 u64 lookup = bits[0].start + bits[0].size;
6438                 struct cache_extent *next;
6439                 next = search_cache_extent(pending, lookup);
6440                 while(next) {
6441                         if (next->start - lookup > 32768)
6442                                 break;
6443                         bits[ret].start = next->start;
6444                         bits[ret].size = next->size;
6445                         lookup = next->start + next->size;
6446                         ret++;
6447                         if (ret == bits_nr)
6448                                 break;
6449                         next = next_cache_extent(next);
6450                         if (!next)
6451                                 break;
6452                 }
6453         }
6454         return ret;
6455 }
6456
6457 static void free_chunk_record(struct cache_extent *cache)
6458 {
6459         struct chunk_record *rec;
6460
6461         rec = container_of(cache, struct chunk_record, cache);
6462         list_del_init(&rec->list);
6463         list_del_init(&rec->dextents);
6464         free(rec);
6465 }
6466
6467 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6468 {
6469         cache_tree_free_extents(chunk_cache, free_chunk_record);
6470 }
6471
6472 static void free_device_record(struct rb_node *node)
6473 {
6474         struct device_record *rec;
6475
6476         rec = container_of(node, struct device_record, node);
6477         free(rec);
6478 }
6479
6480 FREE_RB_BASED_TREE(device_cache, free_device_record);
6481
6482 int insert_block_group_record(struct block_group_tree *tree,
6483                               struct block_group_record *bg_rec)
6484 {
6485         int ret;
6486
6487         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6488         if (ret)
6489                 return ret;
6490
6491         list_add_tail(&bg_rec->list, &tree->block_groups);
6492         return 0;
6493 }
6494
6495 static void free_block_group_record(struct cache_extent *cache)
6496 {
6497         struct block_group_record *rec;
6498
6499         rec = container_of(cache, struct block_group_record, cache);
6500         list_del_init(&rec->list);
6501         free(rec);
6502 }
6503
6504 void free_block_group_tree(struct block_group_tree *tree)
6505 {
6506         cache_tree_free_extents(&tree->tree, free_block_group_record);
6507 }
6508
6509 int insert_device_extent_record(struct device_extent_tree *tree,
6510                                 struct device_extent_record *de_rec)
6511 {
6512         int ret;
6513
6514         /*
6515          * Device extent is a bit different from the other extents, because
6516          * the extents which belong to the different devices may have the
6517          * same start and size, so we need use the special extent cache
6518          * search/insert functions.
6519          */
6520         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6521         if (ret)
6522                 return ret;
6523
6524         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6525         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6526         return 0;
6527 }
6528
6529 static void free_device_extent_record(struct cache_extent *cache)
6530 {
6531         struct device_extent_record *rec;
6532
6533         rec = container_of(cache, struct device_extent_record, cache);
6534         if (!list_empty(&rec->chunk_list))
6535                 list_del_init(&rec->chunk_list);
6536         if (!list_empty(&rec->device_list))
6537                 list_del_init(&rec->device_list);
6538         free(rec);
6539 }
6540
6541 void free_device_extent_tree(struct device_extent_tree *tree)
6542 {
6543         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6544 }
6545
6546 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6547 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6548                                  struct extent_buffer *leaf, int slot)
6549 {
6550         struct btrfs_extent_ref_v0 *ref0;
6551         struct btrfs_key key;
6552         int ret;
6553
6554         btrfs_item_key_to_cpu(leaf, &key, slot);
6555         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6556         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6557                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6558                                 0, 0);
6559         } else {
6560                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6561                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6562         }
6563         return ret;
6564 }
6565 #endif
6566
6567 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6568                                             struct btrfs_key *key,
6569                                             int slot)
6570 {
6571         struct btrfs_chunk *ptr;
6572         struct chunk_record *rec;
6573         int num_stripes, i;
6574
6575         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6576         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6577
6578         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6579         if (!rec) {
6580                 fprintf(stderr, "memory allocation failed\n");
6581                 exit(-1);
6582         }
6583
6584         INIT_LIST_HEAD(&rec->list);
6585         INIT_LIST_HEAD(&rec->dextents);
6586         rec->bg_rec = NULL;
6587
6588         rec->cache.start = key->offset;
6589         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6590
6591         rec->generation = btrfs_header_generation(leaf);
6592
6593         rec->objectid = key->objectid;
6594         rec->type = key->type;
6595         rec->offset = key->offset;
6596
6597         rec->length = rec->cache.size;
6598         rec->owner = btrfs_chunk_owner(leaf, ptr);
6599         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6600         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6601         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6602         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6603         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6604         rec->num_stripes = num_stripes;
6605         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6606
6607         for (i = 0; i < rec->num_stripes; ++i) {
6608                 rec->stripes[i].devid =
6609                         btrfs_stripe_devid_nr(leaf, ptr, i);
6610                 rec->stripes[i].offset =
6611                         btrfs_stripe_offset_nr(leaf, ptr, i);
6612                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6613                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6614                                 BTRFS_UUID_SIZE);
6615         }
6616
6617         return rec;
6618 }
6619
6620 static int process_chunk_item(struct cache_tree *chunk_cache,
6621                               struct btrfs_key *key, struct extent_buffer *eb,
6622                               int slot)
6623 {
6624         struct chunk_record *rec;
6625         struct btrfs_chunk *chunk;
6626         int ret = 0;
6627
6628         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6629         /*
6630          * Do extra check for this chunk item,
6631          *
6632          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6633          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6634          * and owner<->key_type check.
6635          */
6636         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6637                                       key->offset);
6638         if (ret < 0) {
6639                 error("chunk(%llu, %llu) is not valid, ignore it",
6640                       key->offset, btrfs_chunk_length(eb, chunk));
6641                 return 0;
6642         }
6643         rec = btrfs_new_chunk_record(eb, key, slot);
6644         ret = insert_cache_extent(chunk_cache, &rec->cache);
6645         if (ret) {
6646                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6647                         rec->offset, rec->length);
6648                 free(rec);
6649         }
6650
6651         return ret;
6652 }
6653
6654 static int process_device_item(struct rb_root *dev_cache,
6655                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6656 {
6657         struct btrfs_dev_item *ptr;
6658         struct device_record *rec;
6659         int ret = 0;
6660
6661         ptr = btrfs_item_ptr(eb,
6662                 slot, struct btrfs_dev_item);
6663
6664         rec = malloc(sizeof(*rec));
6665         if (!rec) {
6666                 fprintf(stderr, "memory allocation failed\n");
6667                 return -ENOMEM;
6668         }
6669
6670         rec->devid = key->offset;
6671         rec->generation = btrfs_header_generation(eb);
6672
6673         rec->objectid = key->objectid;
6674         rec->type = key->type;
6675         rec->offset = key->offset;
6676
6677         rec->devid = btrfs_device_id(eb, ptr);
6678         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6679         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6680
6681         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6682         if (ret) {
6683                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6684                 free(rec);
6685         }
6686
6687         return ret;
6688 }
6689
6690 struct block_group_record *
6691 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6692                              int slot)
6693 {
6694         struct btrfs_block_group_item *ptr;
6695         struct block_group_record *rec;
6696
6697         rec = calloc(1, sizeof(*rec));
6698         if (!rec) {
6699                 fprintf(stderr, "memory allocation failed\n");
6700                 exit(-1);
6701         }
6702
6703         rec->cache.start = key->objectid;
6704         rec->cache.size = key->offset;
6705
6706         rec->generation = btrfs_header_generation(leaf);
6707
6708         rec->objectid = key->objectid;
6709         rec->type = key->type;
6710         rec->offset = key->offset;
6711
6712         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6713         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6714
6715         INIT_LIST_HEAD(&rec->list);
6716
6717         return rec;
6718 }
6719
6720 static int process_block_group_item(struct block_group_tree *block_group_cache,
6721                                     struct btrfs_key *key,
6722                                     struct extent_buffer *eb, int slot)
6723 {
6724         struct block_group_record *rec;
6725         int ret = 0;
6726
6727         rec = btrfs_new_block_group_record(eb, key, slot);
6728         ret = insert_block_group_record(block_group_cache, rec);
6729         if (ret) {
6730                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6731                         rec->objectid, rec->offset);
6732                 free(rec);
6733         }
6734
6735         return ret;
6736 }
6737
6738 struct device_extent_record *
6739 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6740                                struct btrfs_key *key, int slot)
6741 {
6742         struct device_extent_record *rec;
6743         struct btrfs_dev_extent *ptr;
6744
6745         rec = calloc(1, sizeof(*rec));
6746         if (!rec) {
6747                 fprintf(stderr, "memory allocation failed\n");
6748                 exit(-1);
6749         }
6750
6751         rec->cache.objectid = key->objectid;
6752         rec->cache.start = key->offset;
6753
6754         rec->generation = btrfs_header_generation(leaf);
6755
6756         rec->objectid = key->objectid;
6757         rec->type = key->type;
6758         rec->offset = key->offset;
6759
6760         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6761         rec->chunk_objecteid =
6762                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6763         rec->chunk_offset =
6764                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6765         rec->length = btrfs_dev_extent_length(leaf, ptr);
6766         rec->cache.size = rec->length;
6767
6768         INIT_LIST_HEAD(&rec->chunk_list);
6769         INIT_LIST_HEAD(&rec->device_list);
6770
6771         return rec;
6772 }
6773
6774 static int
6775 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6776                            struct btrfs_key *key, struct extent_buffer *eb,
6777                            int slot)
6778 {
6779         struct device_extent_record *rec;
6780         int ret;
6781
6782         rec = btrfs_new_device_extent_record(eb, key, slot);
6783         ret = insert_device_extent_record(dev_extent_cache, rec);
6784         if (ret) {
6785                 fprintf(stderr,
6786                         "Device extent[%llu, %llu, %llu] existed.\n",
6787                         rec->objectid, rec->offset, rec->length);
6788                 free(rec);
6789         }
6790
6791         return ret;
6792 }
6793
6794 static int process_extent_item(struct btrfs_root *root,
6795                                struct cache_tree *extent_cache,
6796                                struct extent_buffer *eb, int slot)
6797 {
6798         struct btrfs_extent_item *ei;
6799         struct btrfs_extent_inline_ref *iref;
6800         struct btrfs_extent_data_ref *dref;
6801         struct btrfs_shared_data_ref *sref;
6802         struct btrfs_key key;
6803         struct extent_record tmpl;
6804         unsigned long end;
6805         unsigned long ptr;
6806         int ret;
6807         int type;
6808         u32 item_size = btrfs_item_size_nr(eb, slot);
6809         u64 refs = 0;
6810         u64 offset;
6811         u64 num_bytes;
6812         int metadata = 0;
6813
6814         btrfs_item_key_to_cpu(eb, &key, slot);
6815
6816         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6817                 metadata = 1;
6818                 num_bytes = root->fs_info->nodesize;
6819         } else {
6820                 num_bytes = key.offset;
6821         }
6822
6823         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6824                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6825                       key.objectid, root->fs_info->sectorsize);
6826                 return -EIO;
6827         }
6828         if (item_size < sizeof(*ei)) {
6829 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6830                 struct btrfs_extent_item_v0 *ei0;
6831                 BUG_ON(item_size != sizeof(*ei0));
6832                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6833                 refs = btrfs_extent_refs_v0(eb, ei0);
6834 #else
6835                 BUG();
6836 #endif
6837                 memset(&tmpl, 0, sizeof(tmpl));
6838                 tmpl.start = key.objectid;
6839                 tmpl.nr = num_bytes;
6840                 tmpl.extent_item_refs = refs;
6841                 tmpl.metadata = metadata;
6842                 tmpl.found_rec = 1;
6843                 tmpl.max_size = num_bytes;
6844
6845                 return add_extent_rec(extent_cache, &tmpl);
6846         }
6847
6848         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6849         refs = btrfs_extent_refs(eb, ei);
6850         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6851                 metadata = 1;
6852         else
6853                 metadata = 0;
6854         if (metadata && num_bytes != root->fs_info->nodesize) {
6855                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6856                       num_bytes, root->fs_info->nodesize);
6857                 return -EIO;
6858         }
6859         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6860                 error("ignore invalid data extent, length %llu is not aligned to %u",
6861                       num_bytes, root->fs_info->sectorsize);
6862                 return -EIO;
6863         }
6864
6865         memset(&tmpl, 0, sizeof(tmpl));
6866         tmpl.start = key.objectid;
6867         tmpl.nr = num_bytes;
6868         tmpl.extent_item_refs = refs;
6869         tmpl.metadata = metadata;
6870         tmpl.found_rec = 1;
6871         tmpl.max_size = num_bytes;
6872         add_extent_rec(extent_cache, &tmpl);
6873
6874         ptr = (unsigned long)(ei + 1);
6875         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6876             key.type == BTRFS_EXTENT_ITEM_KEY)
6877                 ptr += sizeof(struct btrfs_tree_block_info);
6878
6879         end = (unsigned long)ei + item_size;
6880         while (ptr < end) {
6881                 iref = (struct btrfs_extent_inline_ref *)ptr;
6882                 type = btrfs_extent_inline_ref_type(eb, iref);
6883                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6884                 switch (type) {
6885                 case BTRFS_TREE_BLOCK_REF_KEY:
6886                         ret = add_tree_backref(extent_cache, key.objectid,
6887                                         0, offset, 0);
6888                         if (ret < 0)
6889                                 error(
6890                         "add_tree_backref failed (extent items tree block): %s",
6891                                       strerror(-ret));
6892                         break;
6893                 case BTRFS_SHARED_BLOCK_REF_KEY:
6894                         ret = add_tree_backref(extent_cache, key.objectid,
6895                                         offset, 0, 0);
6896                         if (ret < 0)
6897                                 error(
6898                         "add_tree_backref failed (extent items shared block): %s",
6899                                       strerror(-ret));
6900                         break;
6901                 case BTRFS_EXTENT_DATA_REF_KEY:
6902                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6903                         add_data_backref(extent_cache, key.objectid, 0,
6904                                         btrfs_extent_data_ref_root(eb, dref),
6905                                         btrfs_extent_data_ref_objectid(eb,
6906                                                                        dref),
6907                                         btrfs_extent_data_ref_offset(eb, dref),
6908                                         btrfs_extent_data_ref_count(eb, dref),
6909                                         0, num_bytes);
6910                         break;
6911                 case BTRFS_SHARED_DATA_REF_KEY:
6912                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6913                         add_data_backref(extent_cache, key.objectid, offset,
6914                                         0, 0, 0,
6915                                         btrfs_shared_data_ref_count(eb, sref),
6916                                         0, num_bytes);
6917                         break;
6918                 default:
6919                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6920                                 key.objectid, key.type, num_bytes);
6921                         goto out;
6922                 }
6923                 ptr += btrfs_extent_inline_ref_size(type);
6924         }
6925         WARN_ON(ptr > end);
6926 out:
6927         return 0;
6928 }
6929
6930 static int check_cache_range(struct btrfs_root *root,
6931                              struct btrfs_block_group_cache *cache,
6932                              u64 offset, u64 bytes)
6933 {
6934         struct btrfs_free_space *entry;
6935         u64 *logical;
6936         u64 bytenr;
6937         int stripe_len;
6938         int i, nr, ret;
6939
6940         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6941                 bytenr = btrfs_sb_offset(i);
6942                 ret = btrfs_rmap_block(root->fs_info,
6943                                        cache->key.objectid, bytenr, 0,
6944                                        &logical, &nr, &stripe_len);
6945                 if (ret)
6946                         return ret;
6947
6948                 while (nr--) {
6949                         if (logical[nr] + stripe_len <= offset)
6950                                 continue;
6951                         if (offset + bytes <= logical[nr])
6952                                 continue;
6953                         if (logical[nr] == offset) {
6954                                 if (stripe_len >= bytes) {
6955                                         free(logical);
6956                                         return 0;
6957                                 }
6958                                 bytes -= stripe_len;
6959                                 offset += stripe_len;
6960                         } else if (logical[nr] < offset) {
6961                                 if (logical[nr] + stripe_len >=
6962                                     offset + bytes) {
6963                                         free(logical);
6964                                         return 0;
6965                                 }
6966                                 bytes = (offset + bytes) -
6967                                         (logical[nr] + stripe_len);
6968                                 offset = logical[nr] + stripe_len;
6969                         } else {
6970                                 /*
6971                                  * Could be tricky, the super may land in the
6972                                  * middle of the area we're checking.  First
6973                                  * check the easiest case, it's at the end.
6974                                  */
6975                                 if (logical[nr] + stripe_len >=
6976                                     bytes + offset) {
6977                                         bytes = logical[nr] - offset;
6978                                         continue;
6979                                 }
6980
6981                                 /* Check the left side */
6982                                 ret = check_cache_range(root, cache,
6983                                                         offset,
6984                                                         logical[nr] - offset);
6985                                 if (ret) {
6986                                         free(logical);
6987                                         return ret;
6988                                 }
6989
6990                                 /* Now we continue with the right side */
6991                                 bytes = (offset + bytes) -
6992                                         (logical[nr] + stripe_len);
6993                                 offset = logical[nr] + stripe_len;
6994                         }
6995                 }
6996
6997                 free(logical);
6998         }
6999
7000         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7001         if (!entry) {
7002                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7003                         offset, offset+bytes);
7004                 return -EINVAL;
7005         }
7006
7007         if (entry->offset != offset) {
7008                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7009                         entry->offset);
7010                 return -EINVAL;
7011         }
7012
7013         if (entry->bytes != bytes) {
7014                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7015                         bytes, entry->bytes, offset);
7016                 return -EINVAL;
7017         }
7018
7019         unlink_free_space(cache->free_space_ctl, entry);
7020         free(entry);
7021         return 0;
7022 }
7023
7024 static int verify_space_cache(struct btrfs_root *root,
7025                               struct btrfs_block_group_cache *cache)
7026 {
7027         struct btrfs_path path;
7028         struct extent_buffer *leaf;
7029         struct btrfs_key key;
7030         u64 last;
7031         int ret = 0;
7032
7033         root = root->fs_info->extent_root;
7034
7035         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7036
7037         btrfs_init_path(&path);
7038         key.objectid = last;
7039         key.offset = 0;
7040         key.type = BTRFS_EXTENT_ITEM_KEY;
7041         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7042         if (ret < 0)
7043                 goto out;
7044         ret = 0;
7045         while (1) {
7046                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7047                         ret = btrfs_next_leaf(root, &path);
7048                         if (ret < 0)
7049                                 goto out;
7050                         if (ret > 0) {
7051                                 ret = 0;
7052                                 break;
7053                         }
7054                 }
7055                 leaf = path.nodes[0];
7056                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7057                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7058                         break;
7059                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7060                     key.type != BTRFS_METADATA_ITEM_KEY) {
7061                         path.slots[0]++;
7062                         continue;
7063                 }
7064
7065                 if (last == key.objectid) {
7066                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7067                                 last = key.objectid + key.offset;
7068                         else
7069                                 last = key.objectid + root->fs_info->nodesize;
7070                         path.slots[0]++;
7071                         continue;
7072                 }
7073
7074                 ret = check_cache_range(root, cache, last,
7075                                         key.objectid - last);
7076                 if (ret)
7077                         break;
7078                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7079                         last = key.objectid + key.offset;
7080                 else
7081                         last = key.objectid + root->fs_info->nodesize;
7082                 path.slots[0]++;
7083         }
7084
7085         if (last < cache->key.objectid + cache->key.offset)
7086                 ret = check_cache_range(root, cache, last,
7087                                         cache->key.objectid +
7088                                         cache->key.offset - last);
7089
7090 out:
7091         btrfs_release_path(&path);
7092
7093         if (!ret &&
7094             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7095                 fprintf(stderr, "There are still entries left in the space "
7096                         "cache\n");
7097                 ret = -EINVAL;
7098         }
7099
7100         return ret;
7101 }
7102
7103 static int check_space_cache(struct btrfs_root *root)
7104 {
7105         struct btrfs_block_group_cache *cache;
7106         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7107         int ret;
7108         int error = 0;
7109
7110         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7111             btrfs_super_generation(root->fs_info->super_copy) !=
7112             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7113                 printf("cache and super generation don't match, space cache "
7114                        "will be invalidated\n");
7115                 return 0;
7116         }
7117
7118         if (ctx.progress_enabled) {
7119                 ctx.tp = TASK_FREE_SPACE;
7120                 task_start(ctx.info);
7121         }
7122
7123         while (1) {
7124                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7125                 if (!cache)
7126                         break;
7127
7128                 start = cache->key.objectid + cache->key.offset;
7129                 if (!cache->free_space_ctl) {
7130                         if (btrfs_init_free_space_ctl(cache,
7131                                                 root->fs_info->sectorsize)) {
7132                                 ret = -ENOMEM;
7133                                 break;
7134                         }
7135                 } else {
7136                         btrfs_remove_free_space_cache(cache);
7137                 }
7138
7139                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7140                         ret = exclude_super_stripes(root, cache);
7141                         if (ret) {
7142                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7143                                         strerror(-ret));
7144                                 error++;
7145                                 continue;
7146                         }
7147                         ret = load_free_space_tree(root->fs_info, cache);
7148                         free_excluded_extents(root, cache);
7149                         if (ret < 0) {
7150                                 fprintf(stderr, "could not load free space tree: %s\n",
7151                                         strerror(-ret));
7152                                 error++;
7153                                 continue;
7154                         }
7155                         error += ret;
7156                 } else {
7157                         ret = load_free_space_cache(root->fs_info, cache);
7158                         if (!ret)
7159                                 continue;
7160                 }
7161
7162                 ret = verify_space_cache(root, cache);
7163                 if (ret) {
7164                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7165                                 cache->key.objectid);
7166                         error++;
7167                 }
7168         }
7169
7170         task_stop(ctx.info);
7171
7172         return error ? -EINVAL : 0;
7173 }
7174
7175 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7176                         u64 num_bytes, unsigned long leaf_offset,
7177                         struct extent_buffer *eb) {
7178
7179         struct btrfs_fs_info *fs_info = root->fs_info;
7180         u64 offset = 0;
7181         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7182         char *data;
7183         unsigned long csum_offset;
7184         u32 csum;
7185         u32 csum_expected;
7186         u64 read_len;
7187         u64 data_checked = 0;
7188         u64 tmp;
7189         int ret = 0;
7190         int mirror;
7191         int num_copies;
7192
7193         if (num_bytes % fs_info->sectorsize)
7194                 return -EINVAL;
7195
7196         data = malloc(num_bytes);
7197         if (!data)
7198                 return -ENOMEM;
7199
7200         while (offset < num_bytes) {
7201                 mirror = 0;
7202 again:
7203                 read_len = num_bytes - offset;
7204                 /* read as much space once a time */
7205                 ret = read_extent_data(fs_info, data + offset,
7206                                 bytenr + offset, &read_len, mirror);
7207                 if (ret)
7208                         goto out;
7209                 data_checked = 0;
7210                 /* verify every 4k data's checksum */
7211                 while (data_checked < read_len) {
7212                         csum = ~(u32)0;
7213                         tmp = offset + data_checked;
7214
7215                         csum = btrfs_csum_data((char *)data + tmp,
7216                                                csum, fs_info->sectorsize);
7217                         btrfs_csum_final(csum, (u8 *)&csum);
7218
7219                         csum_offset = leaf_offset +
7220                                  tmp / fs_info->sectorsize * csum_size;
7221                         read_extent_buffer(eb, (char *)&csum_expected,
7222                                            csum_offset, csum_size);
7223                         /* try another mirror */
7224                         if (csum != csum_expected) {
7225                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7226                                                 mirror, bytenr + tmp,
7227                                                 csum, csum_expected);
7228                                 num_copies = btrfs_num_copies(root->fs_info,
7229                                                 bytenr, num_bytes);
7230                                 if (mirror < num_copies - 1) {
7231                                         mirror += 1;
7232                                         goto again;
7233                                 }
7234                         }
7235                         data_checked += fs_info->sectorsize;
7236                 }
7237                 offset += read_len;
7238         }
7239 out:
7240         free(data);
7241         return ret;
7242 }
7243
7244 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7245                                u64 num_bytes)
7246 {
7247         struct btrfs_path path;
7248         struct extent_buffer *leaf;
7249         struct btrfs_key key;
7250         int ret;
7251
7252         btrfs_init_path(&path);
7253         key.objectid = bytenr;
7254         key.type = BTRFS_EXTENT_ITEM_KEY;
7255         key.offset = (u64)-1;
7256
7257 again:
7258         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7259                                 0, 0);
7260         if (ret < 0) {
7261                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7262                 btrfs_release_path(&path);
7263                 return ret;
7264         } else if (ret) {
7265                 if (path.slots[0] > 0) {
7266                         path.slots[0]--;
7267                 } else {
7268                         ret = btrfs_prev_leaf(root, &path);
7269                         if (ret < 0) {
7270                                 goto out;
7271                         } else if (ret > 0) {
7272                                 ret = 0;
7273                                 goto out;
7274                         }
7275                 }
7276         }
7277
7278         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7279
7280         /*
7281          * Block group items come before extent items if they have the same
7282          * bytenr, so walk back one more just in case.  Dear future traveller,
7283          * first congrats on mastering time travel.  Now if it's not too much
7284          * trouble could you go back to 2006 and tell Chris to make the
7285          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7286          * EXTENT_ITEM_KEY please?
7287          */
7288         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7289                 if (path.slots[0] > 0) {
7290                         path.slots[0]--;
7291                 } else {
7292                         ret = btrfs_prev_leaf(root, &path);
7293                         if (ret < 0) {
7294                                 goto out;
7295                         } else if (ret > 0) {
7296                                 ret = 0;
7297                                 goto out;
7298                         }
7299                 }
7300                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7301         }
7302
7303         while (num_bytes) {
7304                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7305                         ret = btrfs_next_leaf(root, &path);
7306                         if (ret < 0) {
7307                                 fprintf(stderr, "Error going to next leaf "
7308                                         "%d\n", ret);
7309                                 btrfs_release_path(&path);
7310                                 return ret;
7311                         } else if (ret) {
7312                                 break;
7313                         }
7314                 }
7315                 leaf = path.nodes[0];
7316                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7317                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7318                         path.slots[0]++;
7319                         continue;
7320                 }
7321                 if (key.objectid + key.offset < bytenr) {
7322                         path.slots[0]++;
7323                         continue;
7324                 }
7325                 if (key.objectid > bytenr + num_bytes)
7326                         break;
7327
7328                 if (key.objectid == bytenr) {
7329                         if (key.offset >= num_bytes) {
7330                                 num_bytes = 0;
7331                                 break;
7332                         }
7333                         num_bytes -= key.offset;
7334                         bytenr += key.offset;
7335                 } else if (key.objectid < bytenr) {
7336                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7337                                 num_bytes = 0;
7338                                 break;
7339                         }
7340                         num_bytes = (bytenr + num_bytes) -
7341                                 (key.objectid + key.offset);
7342                         bytenr = key.objectid + key.offset;
7343                 } else {
7344                         if (key.objectid + key.offset < bytenr + num_bytes) {
7345                                 u64 new_start = key.objectid + key.offset;
7346                                 u64 new_bytes = bytenr + num_bytes - new_start;
7347
7348                                 /*
7349                                  * Weird case, the extent is in the middle of
7350                                  * our range, we'll have to search one side
7351                                  * and then the other.  Not sure if this happens
7352                                  * in real life, but no harm in coding it up
7353                                  * anyway just in case.
7354                                  */
7355                                 btrfs_release_path(&path);
7356                                 ret = check_extent_exists(root, new_start,
7357                                                           new_bytes);
7358                                 if (ret) {
7359                                         fprintf(stderr, "Right section didn't "
7360                                                 "have a record\n");
7361                                         break;
7362                                 }
7363                                 num_bytes = key.objectid - bytenr;
7364                                 goto again;
7365                         }
7366                         num_bytes = key.objectid - bytenr;
7367                 }
7368                 path.slots[0]++;
7369         }
7370         ret = 0;
7371
7372 out:
7373         if (num_bytes && !ret) {
7374                 fprintf(stderr, "There are no extents for csum range "
7375                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7376                 ret = 1;
7377         }
7378
7379         btrfs_release_path(&path);
7380         return ret;
7381 }
7382
7383 static int check_csums(struct btrfs_root *root)
7384 {
7385         struct btrfs_path path;
7386         struct extent_buffer *leaf;
7387         struct btrfs_key key;
7388         u64 offset = 0, num_bytes = 0;
7389         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7390         int errors = 0;
7391         int ret;
7392         u64 data_len;
7393         unsigned long leaf_offset;
7394
7395         root = root->fs_info->csum_root;
7396         if (!extent_buffer_uptodate(root->node)) {
7397                 fprintf(stderr, "No valid csum tree found\n");
7398                 return -ENOENT;
7399         }
7400
7401         btrfs_init_path(&path);
7402         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7403         key.type = BTRFS_EXTENT_CSUM_KEY;
7404         key.offset = 0;
7405         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7406         if (ret < 0) {
7407                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7408                 btrfs_release_path(&path);
7409                 return ret;
7410         }
7411
7412         if (ret > 0 && path.slots[0])
7413                 path.slots[0]--;
7414         ret = 0;
7415
7416         while (1) {
7417                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7418                         ret = btrfs_next_leaf(root, &path);
7419                         if (ret < 0) {
7420                                 fprintf(stderr, "Error going to next leaf "
7421                                         "%d\n", ret);
7422                                 break;
7423                         }
7424                         if (ret)
7425                                 break;
7426                 }
7427                 leaf = path.nodes[0];
7428
7429                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7430                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7431                         path.slots[0]++;
7432                         continue;
7433                 }
7434
7435                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7436                               csum_size) * root->fs_info->sectorsize;
7437                 if (!check_data_csum)
7438                         goto skip_csum_check;
7439                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7440                 ret = check_extent_csums(root, key.offset, data_len,
7441                                          leaf_offset, leaf);
7442                 if (ret)
7443                         break;
7444 skip_csum_check:
7445                 if (!num_bytes) {
7446                         offset = key.offset;
7447                 } else if (key.offset != offset + num_bytes) {
7448                         ret = check_extent_exists(root, offset, num_bytes);
7449                         if (ret) {
7450                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7451                                         "there is no extent record\n",
7452                                         offset, offset+num_bytes);
7453                                 errors++;
7454                         }
7455                         offset = key.offset;
7456                         num_bytes = 0;
7457                 }
7458                 num_bytes += data_len;
7459                 path.slots[0]++;
7460         }
7461
7462         btrfs_release_path(&path);
7463         return errors;
7464 }
7465
7466 static int is_dropped_key(struct btrfs_key *key,
7467                           struct btrfs_key *drop_key) {
7468         if (key->objectid < drop_key->objectid)
7469                 return 1;
7470         else if (key->objectid == drop_key->objectid) {
7471                 if (key->type < drop_key->type)
7472                         return 1;
7473                 else if (key->type == drop_key->type) {
7474                         if (key->offset < drop_key->offset)
7475                                 return 1;
7476                 }
7477         }
7478         return 0;
7479 }
7480
7481 /*
7482  * Here are the rules for FULL_BACKREF.
7483  *
7484  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7485  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7486  *      FULL_BACKREF set.
7487  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7488  *    if it happened after the relocation occurred since we'll have dropped the
7489  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7490  *    have no real way to know for sure.
7491  *
7492  * We process the blocks one root at a time, and we start from the lowest root
7493  * objectid and go to the highest.  So we can just lookup the owner backref for
7494  * the record and if we don't find it then we know it doesn't exist and we have
7495  * a FULL BACKREF.
7496  *
7497  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7498  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7499  * be set or not and then we can check later once we've gathered all the refs.
7500  */
7501 static int calc_extent_flag(struct cache_tree *extent_cache,
7502                            struct extent_buffer *buf,
7503                            struct root_item_record *ri,
7504                            u64 *flags)
7505 {
7506         struct extent_record *rec;
7507         struct cache_extent *cache;
7508         struct tree_backref *tback;
7509         u64 owner = 0;
7510
7511         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7512         /* we have added this extent before */
7513         if (!cache)
7514                 return -ENOENT;
7515
7516         rec = container_of(cache, struct extent_record, cache);
7517
7518         /*
7519          * Except file/reloc tree, we can not have
7520          * FULL BACKREF MODE
7521          */
7522         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7523                 goto normal;
7524         /*
7525          * root node
7526          */
7527         if (buf->start == ri->bytenr)
7528                 goto normal;
7529
7530         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7531                 goto full_backref;
7532
7533         owner = btrfs_header_owner(buf);
7534         if (owner == ri->objectid)
7535                 goto normal;
7536
7537         tback = find_tree_backref(rec, 0, owner);
7538         if (!tback)
7539                 goto full_backref;
7540 normal:
7541         *flags = 0;
7542         if (rec->flag_block_full_backref != FLAG_UNSET &&
7543             rec->flag_block_full_backref != 0)
7544                 rec->bad_full_backref = 1;
7545         return 0;
7546 full_backref:
7547         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7548         if (rec->flag_block_full_backref != FLAG_UNSET &&
7549             rec->flag_block_full_backref != 1)
7550                 rec->bad_full_backref = 1;
7551         return 0;
7552 }
7553
7554 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7555 {
7556         fprintf(stderr, "Invalid key type(");
7557         print_key_type(stderr, 0, key_type);
7558         fprintf(stderr, ") found in root(");
7559         print_objectid(stderr, rootid, 0);
7560         fprintf(stderr, ")\n");
7561 }
7562
7563 /*
7564  * Check if the key is valid with its extent buffer.
7565  *
7566  * This is a early check in case invalid key exists in a extent buffer
7567  * This is not comprehensive yet, but should prevent wrong key/item passed
7568  * further
7569  */
7570 static int check_type_with_root(u64 rootid, u8 key_type)
7571 {
7572         switch (key_type) {
7573         /* Only valid in chunk tree */
7574         case BTRFS_DEV_ITEM_KEY:
7575         case BTRFS_CHUNK_ITEM_KEY:
7576                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7577                         goto err;
7578                 break;
7579         /* valid in csum and log tree */
7580         case BTRFS_CSUM_TREE_OBJECTID:
7581                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7582                       is_fstree(rootid)))
7583                         goto err;
7584                 break;
7585         case BTRFS_EXTENT_ITEM_KEY:
7586         case BTRFS_METADATA_ITEM_KEY:
7587         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7588                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7589                         goto err;
7590                 break;
7591         case BTRFS_ROOT_ITEM_KEY:
7592                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7593                         goto err;
7594                 break;
7595         case BTRFS_DEV_EXTENT_KEY:
7596                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7597                         goto err;
7598                 break;
7599         }
7600         return 0;
7601 err:
7602         report_mismatch_key_root(key_type, rootid);
7603         return -EINVAL;
7604 }
7605
7606 static int run_next_block(struct btrfs_root *root,
7607                           struct block_info *bits,
7608                           int bits_nr,
7609                           u64 *last,
7610                           struct cache_tree *pending,
7611                           struct cache_tree *seen,
7612                           struct cache_tree *reada,
7613                           struct cache_tree *nodes,
7614                           struct cache_tree *extent_cache,
7615                           struct cache_tree *chunk_cache,
7616                           struct rb_root *dev_cache,
7617                           struct block_group_tree *block_group_cache,
7618                           struct device_extent_tree *dev_extent_cache,
7619                           struct root_item_record *ri)
7620 {
7621         struct btrfs_fs_info *fs_info = root->fs_info;
7622         struct extent_buffer *buf;
7623         struct extent_record *rec = NULL;
7624         u64 bytenr;
7625         u32 size;
7626         u64 parent;
7627         u64 owner;
7628         u64 flags;
7629         u64 ptr;
7630         u64 gen = 0;
7631         int ret = 0;
7632         int i;
7633         int nritems;
7634         struct btrfs_key key;
7635         struct cache_extent *cache;
7636         int reada_bits;
7637
7638         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7639                                     bits_nr, &reada_bits);
7640         if (nritems == 0)
7641                 return 1;
7642
7643         if (!reada_bits) {
7644                 for(i = 0; i < nritems; i++) {
7645                         ret = add_cache_extent(reada, bits[i].start,
7646                                                bits[i].size);
7647                         if (ret == -EEXIST)
7648                                 continue;
7649
7650                         /* fixme, get the parent transid */
7651                         readahead_tree_block(fs_info, bits[i].start, 0);
7652                 }
7653         }
7654         *last = bits[0].start;
7655         bytenr = bits[0].start;
7656         size = bits[0].size;
7657
7658         cache = lookup_cache_extent(pending, bytenr, size);
7659         if (cache) {
7660                 remove_cache_extent(pending, cache);
7661                 free(cache);
7662         }
7663         cache = lookup_cache_extent(reada, bytenr, size);
7664         if (cache) {
7665                 remove_cache_extent(reada, cache);
7666                 free(cache);
7667         }
7668         cache = lookup_cache_extent(nodes, bytenr, size);
7669         if (cache) {
7670                 remove_cache_extent(nodes, cache);
7671                 free(cache);
7672         }
7673         cache = lookup_cache_extent(extent_cache, bytenr, size);
7674         if (cache) {
7675                 rec = container_of(cache, struct extent_record, cache);
7676                 gen = rec->parent_generation;
7677         }
7678
7679         /* fixme, get the real parent transid */
7680         buf = read_tree_block(root->fs_info, bytenr, gen);
7681         if (!extent_buffer_uptodate(buf)) {
7682                 record_bad_block_io(root->fs_info,
7683                                     extent_cache, bytenr, size);
7684                 goto out;
7685         }
7686
7687         nritems = btrfs_header_nritems(buf);
7688
7689         flags = 0;
7690         if (!init_extent_tree) {
7691                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7692                                        btrfs_header_level(buf), 1, NULL,
7693                                        &flags);
7694                 if (ret < 0) {
7695                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7696                         if (ret < 0) {
7697                                 fprintf(stderr, "Couldn't calc extent flags\n");
7698                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7699                         }
7700                 }
7701         } else {
7702                 flags = 0;
7703                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7704                 if (ret < 0) {
7705                         fprintf(stderr, "Couldn't calc extent flags\n");
7706                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7707                 }
7708         }
7709
7710         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7711                 if (ri != NULL &&
7712                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7713                     ri->objectid == btrfs_header_owner(buf)) {
7714                         /*
7715                          * Ok we got to this block from it's original owner and
7716                          * we have FULL_BACKREF set.  Relocation can leave
7717                          * converted blocks over so this is altogether possible,
7718                          * however it's not possible if the generation > the
7719                          * last snapshot, so check for this case.
7720                          */
7721                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7722                             btrfs_header_generation(buf) > ri->last_snapshot) {
7723                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7724                                 rec->bad_full_backref = 1;
7725                         }
7726                 }
7727         } else {
7728                 if (ri != NULL &&
7729                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7730                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7731                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7732                         rec->bad_full_backref = 1;
7733                 }
7734         }
7735
7736         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7737                 rec->flag_block_full_backref = 1;
7738                 parent = bytenr;
7739                 owner = 0;
7740         } else {
7741                 rec->flag_block_full_backref = 0;
7742                 parent = 0;
7743                 owner = btrfs_header_owner(buf);
7744         }
7745
7746         ret = check_block(root, extent_cache, buf, flags);
7747         if (ret)
7748                 goto out;
7749
7750         if (btrfs_is_leaf(buf)) {
7751                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7752                 for (i = 0; i < nritems; i++) {
7753                         struct btrfs_file_extent_item *fi;
7754                         btrfs_item_key_to_cpu(buf, &key, i);
7755                         /*
7756                          * Check key type against the leaf owner.
7757                          * Could filter quite a lot of early error if
7758                          * owner is correct
7759                          */
7760                         if (check_type_with_root(btrfs_header_owner(buf),
7761                                                  key.type)) {
7762                                 fprintf(stderr, "ignoring invalid key\n");
7763                                 continue;
7764                         }
7765                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7766                                 process_extent_item(root, extent_cache, buf,
7767                                                     i);
7768                                 continue;
7769                         }
7770                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7771                                 process_extent_item(root, extent_cache, buf,
7772                                                     i);
7773                                 continue;
7774                         }
7775                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7776                                 total_csum_bytes +=
7777                                         btrfs_item_size_nr(buf, i);
7778                                 continue;
7779                         }
7780                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7781                                 process_chunk_item(chunk_cache, &key, buf, i);
7782                                 continue;
7783                         }
7784                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7785                                 process_device_item(dev_cache, &key, buf, i);
7786                                 continue;
7787                         }
7788                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7789                                 process_block_group_item(block_group_cache,
7790                                         &key, buf, i);
7791                                 continue;
7792                         }
7793                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7794                                 process_device_extent_item(dev_extent_cache,
7795                                         &key, buf, i);
7796                                 continue;
7797
7798                         }
7799                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7800 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7801                                 process_extent_ref_v0(extent_cache, buf, i);
7802 #else
7803                                 BUG();
7804 #endif
7805                                 continue;
7806                         }
7807
7808                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7809                                 ret = add_tree_backref(extent_cache,
7810                                                 key.objectid, 0, key.offset, 0);
7811                                 if (ret < 0)
7812                                         error(
7813                                 "add_tree_backref failed (leaf tree block): %s",
7814                                               strerror(-ret));
7815                                 continue;
7816                         }
7817                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7818                                 ret = add_tree_backref(extent_cache,
7819                                                 key.objectid, key.offset, 0, 0);
7820                                 if (ret < 0)
7821                                         error(
7822                                 "add_tree_backref failed (leaf shared block): %s",
7823                                               strerror(-ret));
7824                                 continue;
7825                         }
7826                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7827                                 struct btrfs_extent_data_ref *ref;
7828                                 ref = btrfs_item_ptr(buf, i,
7829                                                 struct btrfs_extent_data_ref);
7830                                 add_data_backref(extent_cache,
7831                                         key.objectid, 0,
7832                                         btrfs_extent_data_ref_root(buf, ref),
7833                                         btrfs_extent_data_ref_objectid(buf,
7834                                                                        ref),
7835                                         btrfs_extent_data_ref_offset(buf, ref),
7836                                         btrfs_extent_data_ref_count(buf, ref),
7837                                         0, root->fs_info->sectorsize);
7838                                 continue;
7839                         }
7840                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7841                                 struct btrfs_shared_data_ref *ref;
7842                                 ref = btrfs_item_ptr(buf, i,
7843                                                 struct btrfs_shared_data_ref);
7844                                 add_data_backref(extent_cache,
7845                                         key.objectid, key.offset, 0, 0, 0,
7846                                         btrfs_shared_data_ref_count(buf, ref),
7847                                         0, root->fs_info->sectorsize);
7848                                 continue;
7849                         }
7850                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7851                                 struct bad_item *bad;
7852
7853                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7854                                         continue;
7855                                 if (!owner)
7856                                         continue;
7857                                 bad = malloc(sizeof(struct bad_item));
7858                                 if (!bad)
7859                                         continue;
7860                                 INIT_LIST_HEAD(&bad->list);
7861                                 memcpy(&bad->key, &key,
7862                                        sizeof(struct btrfs_key));
7863                                 bad->root_id = owner;
7864                                 list_add_tail(&bad->list, &delete_items);
7865                                 continue;
7866                         }
7867                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7868                                 continue;
7869                         fi = btrfs_item_ptr(buf, i,
7870                                             struct btrfs_file_extent_item);
7871                         if (btrfs_file_extent_type(buf, fi) ==
7872                             BTRFS_FILE_EXTENT_INLINE)
7873                                 continue;
7874                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7875                                 continue;
7876
7877                         data_bytes_allocated +=
7878                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7879                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7880                                 abort();
7881                         }
7882                         data_bytes_referenced +=
7883                                 btrfs_file_extent_num_bytes(buf, fi);
7884                         add_data_backref(extent_cache,
7885                                 btrfs_file_extent_disk_bytenr(buf, fi),
7886                                 parent, owner, key.objectid, key.offset -
7887                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7888                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7889                 }
7890         } else {
7891                 int level;
7892                 struct btrfs_key first_key;
7893
7894                 first_key.objectid = 0;
7895
7896                 if (nritems > 0)
7897                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7898                 level = btrfs_header_level(buf);
7899                 for (i = 0; i < nritems; i++) {
7900                         struct extent_record tmpl;
7901
7902                         ptr = btrfs_node_blockptr(buf, i);
7903                         size = root->fs_info->nodesize;
7904                         btrfs_node_key_to_cpu(buf, &key, i);
7905                         if (ri != NULL) {
7906                                 if ((level == ri->drop_level)
7907                                     && is_dropped_key(&key, &ri->drop_key)) {
7908                                         continue;
7909                                 }
7910                         }
7911
7912                         memset(&tmpl, 0, sizeof(tmpl));
7913                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7914                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7915                         tmpl.start = ptr;
7916                         tmpl.nr = size;
7917                         tmpl.refs = 1;
7918                         tmpl.metadata = 1;
7919                         tmpl.max_size = size;
7920                         ret = add_extent_rec(extent_cache, &tmpl);
7921                         if (ret < 0)
7922                                 goto out;
7923
7924                         ret = add_tree_backref(extent_cache, ptr, parent,
7925                                         owner, 1);
7926                         if (ret < 0) {
7927                                 error(
7928                                 "add_tree_backref failed (non-leaf block): %s",
7929                                       strerror(-ret));
7930                                 continue;
7931                         }
7932
7933                         if (level > 1) {
7934                                 add_pending(nodes, seen, ptr, size);
7935                         } else {
7936                                 add_pending(pending, seen, ptr, size);
7937                         }
7938                 }
7939                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7940                                       nritems) * sizeof(struct btrfs_key_ptr);
7941         }
7942         total_btree_bytes += buf->len;
7943         if (fs_root_objectid(btrfs_header_owner(buf)))
7944                 total_fs_tree_bytes += buf->len;
7945         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7946                 total_extent_tree_bytes += buf->len;
7947 out:
7948         free_extent_buffer(buf);
7949         return ret;
7950 }
7951
7952 static int add_root_to_pending(struct extent_buffer *buf,
7953                                struct cache_tree *extent_cache,
7954                                struct cache_tree *pending,
7955                                struct cache_tree *seen,
7956                                struct cache_tree *nodes,
7957                                u64 objectid)
7958 {
7959         struct extent_record tmpl;
7960         int ret;
7961
7962         if (btrfs_header_level(buf) > 0)
7963                 add_pending(nodes, seen, buf->start, buf->len);
7964         else
7965                 add_pending(pending, seen, buf->start, buf->len);
7966
7967         memset(&tmpl, 0, sizeof(tmpl));
7968         tmpl.start = buf->start;
7969         tmpl.nr = buf->len;
7970         tmpl.is_root = 1;
7971         tmpl.refs = 1;
7972         tmpl.metadata = 1;
7973         tmpl.max_size = buf->len;
7974         add_extent_rec(extent_cache, &tmpl);
7975
7976         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7977             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7978                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7979                                 0, 1);
7980         else
7981                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7982                                 1);
7983         return ret;
7984 }
7985
7986 /* as we fix the tree, we might be deleting blocks that
7987  * we're tracking for repair.  This hook makes sure we
7988  * remove any backrefs for blocks as we are fixing them.
7989  */
7990 static int free_extent_hook(struct btrfs_trans_handle *trans,
7991                             struct btrfs_root *root,
7992                             u64 bytenr, u64 num_bytes, u64 parent,
7993                             u64 root_objectid, u64 owner, u64 offset,
7994                             int refs_to_drop)
7995 {
7996         struct extent_record *rec;
7997         struct cache_extent *cache;
7998         int is_data;
7999         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8000
8001         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8002         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8003         if (!cache)
8004                 return 0;
8005
8006         rec = container_of(cache, struct extent_record, cache);
8007         if (is_data) {
8008                 struct data_backref *back;
8009                 back = find_data_backref(rec, parent, root_objectid, owner,
8010                                          offset, 1, bytenr, num_bytes);
8011                 if (!back)
8012                         goto out;
8013                 if (back->node.found_ref) {
8014                         back->found_ref -= refs_to_drop;
8015                         if (rec->refs)
8016                                 rec->refs -= refs_to_drop;
8017                 }
8018                 if (back->node.found_extent_tree) {
8019                         back->num_refs -= refs_to_drop;
8020                         if (rec->extent_item_refs)
8021                                 rec->extent_item_refs -= refs_to_drop;
8022                 }
8023                 if (back->found_ref == 0)
8024                         back->node.found_ref = 0;
8025                 if (back->num_refs == 0)
8026                         back->node.found_extent_tree = 0;
8027
8028                 if (!back->node.found_extent_tree && back->node.found_ref) {
8029                         list_del(&back->node.list);
8030                         free(back);
8031                 }
8032         } else {
8033                 struct tree_backref *back;
8034                 back = find_tree_backref(rec, parent, root_objectid);
8035                 if (!back)
8036                         goto out;
8037                 if (back->node.found_ref) {
8038                         if (rec->refs)
8039                                 rec->refs--;
8040                         back->node.found_ref = 0;
8041                 }
8042                 if (back->node.found_extent_tree) {
8043                         if (rec->extent_item_refs)
8044                                 rec->extent_item_refs--;
8045                         back->node.found_extent_tree = 0;
8046                 }
8047                 if (!back->node.found_extent_tree && back->node.found_ref) {
8048                         list_del(&back->node.list);
8049                         free(back);
8050                 }
8051         }
8052         maybe_free_extent_rec(extent_cache, rec);
8053 out:
8054         return 0;
8055 }
8056
8057 static int delete_extent_records(struct btrfs_trans_handle *trans,
8058                                  struct btrfs_root *root,
8059                                  struct btrfs_path *path,
8060                                  u64 bytenr)
8061 {
8062         struct btrfs_key key;
8063         struct btrfs_key found_key;
8064         struct extent_buffer *leaf;
8065         int ret;
8066         int slot;
8067
8068
8069         key.objectid = bytenr;
8070         key.type = (u8)-1;
8071         key.offset = (u64)-1;
8072
8073         while(1) {
8074                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8075                                         &key, path, 0, 1);
8076                 if (ret < 0)
8077                         break;
8078
8079                 if (ret > 0) {
8080                         ret = 0;
8081                         if (path->slots[0] == 0)
8082                                 break;
8083                         path->slots[0]--;
8084                 }
8085                 ret = 0;
8086
8087                 leaf = path->nodes[0];
8088                 slot = path->slots[0];
8089
8090                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8091                 if (found_key.objectid != bytenr)
8092                         break;
8093
8094                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8095                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8096                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8097                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8098                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8099                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8100                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8101                         btrfs_release_path(path);
8102                         if (found_key.type == 0) {
8103                                 if (found_key.offset == 0)
8104                                         break;
8105                                 key.offset = found_key.offset - 1;
8106                                 key.type = found_key.type;
8107                         }
8108                         key.type = found_key.type - 1;
8109                         key.offset = (u64)-1;
8110                         continue;
8111                 }
8112
8113                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8114                         found_key.objectid, found_key.type, found_key.offset);
8115
8116                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8117                 if (ret)
8118                         break;
8119                 btrfs_release_path(path);
8120
8121                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8122                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8123                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8124                                 found_key.offset : root->fs_info->nodesize;
8125
8126                         ret = btrfs_update_block_group(trans, root, bytenr,
8127                                                        bytes, 0, 0);
8128                         if (ret)
8129                                 break;
8130                 }
8131         }
8132
8133         btrfs_release_path(path);
8134         return ret;
8135 }
8136
8137 /*
8138  * for a single backref, this will allocate a new extent
8139  * and add the backref to it.
8140  */
8141 static int record_extent(struct btrfs_trans_handle *trans,
8142                          struct btrfs_fs_info *info,
8143                          struct btrfs_path *path,
8144                          struct extent_record *rec,
8145                          struct extent_backref *back,
8146                          int allocated, u64 flags)
8147 {
8148         int ret = 0;
8149         struct btrfs_root *extent_root = info->extent_root;
8150         struct extent_buffer *leaf;
8151         struct btrfs_key ins_key;
8152         struct btrfs_extent_item *ei;
8153         struct data_backref *dback;
8154         struct btrfs_tree_block_info *bi;
8155
8156         if (!back->is_data)
8157                 rec->max_size = max_t(u64, rec->max_size,
8158                                     info->nodesize);
8159
8160         if (!allocated) {
8161                 u32 item_size = sizeof(*ei);
8162
8163                 if (!back->is_data)
8164                         item_size += sizeof(*bi);
8165
8166                 ins_key.objectid = rec->start;
8167                 ins_key.offset = rec->max_size;
8168                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8169
8170                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8171                                         &ins_key, item_size);
8172                 if (ret)
8173                         goto fail;
8174
8175                 leaf = path->nodes[0];
8176                 ei = btrfs_item_ptr(leaf, path->slots[0],
8177                                     struct btrfs_extent_item);
8178
8179                 btrfs_set_extent_refs(leaf, ei, 0);
8180                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8181
8182                 if (back->is_data) {
8183                         btrfs_set_extent_flags(leaf, ei,
8184                                                BTRFS_EXTENT_FLAG_DATA);
8185                 } else {
8186                         struct btrfs_disk_key copy_key;;
8187
8188                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8189                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8190                                              sizeof(*bi));
8191
8192                         btrfs_set_disk_key_objectid(&copy_key,
8193                                                     rec->info_objectid);
8194                         btrfs_set_disk_key_type(&copy_key, 0);
8195                         btrfs_set_disk_key_offset(&copy_key, 0);
8196
8197                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8198                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8199
8200                         btrfs_set_extent_flags(leaf, ei,
8201                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8202                 }
8203
8204                 btrfs_mark_buffer_dirty(leaf);
8205                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8206                                                rec->max_size, 1, 0);
8207                 if (ret)
8208                         goto fail;
8209                 btrfs_release_path(path);
8210         }
8211
8212         if (back->is_data) {
8213                 u64 parent;
8214                 int i;
8215
8216                 dback = to_data_backref(back);
8217                 if (back->full_backref)
8218                         parent = dback->parent;
8219                 else
8220                         parent = 0;
8221
8222                 for (i = 0; i < dback->found_ref; i++) {
8223                         /* if parent != 0, we're doing a full backref
8224                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8225                          * just makes the backref allocator create a data
8226                          * backref
8227                          */
8228                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8229                                                    rec->start, rec->max_size,
8230                                                    parent,
8231                                                    dback->root,
8232                                                    parent ?
8233                                                    BTRFS_FIRST_FREE_OBJECTID :
8234                                                    dback->owner,
8235                                                    dback->offset);
8236                         if (ret)
8237                                 break;
8238                 }
8239                 fprintf(stderr, "adding new data backref"
8240                                 " on %llu %s %llu owner %llu"
8241                                 " offset %llu found %d\n",
8242                                 (unsigned long long)rec->start,
8243                                 back->full_backref ?
8244                                 "parent" : "root",
8245                                 back->full_backref ?
8246                                 (unsigned long long)parent :
8247                                 (unsigned long long)dback->root,
8248                                 (unsigned long long)dback->owner,
8249                                 (unsigned long long)dback->offset,
8250                                 dback->found_ref);
8251         } else {
8252                 u64 parent;
8253                 struct tree_backref *tback;
8254
8255                 tback = to_tree_backref(back);
8256                 if (back->full_backref)
8257                         parent = tback->parent;
8258                 else
8259                         parent = 0;
8260
8261                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8262                                            rec->start, rec->max_size,
8263                                            parent, tback->root, 0, 0);
8264                 fprintf(stderr, "adding new tree backref on "
8265                         "start %llu len %llu parent %llu root %llu\n",
8266                         rec->start, rec->max_size, parent, tback->root);
8267         }
8268 fail:
8269         btrfs_release_path(path);
8270         return ret;
8271 }
8272
8273 static struct extent_entry *find_entry(struct list_head *entries,
8274                                        u64 bytenr, u64 bytes)
8275 {
8276         struct extent_entry *entry = NULL;
8277
8278         list_for_each_entry(entry, entries, list) {
8279                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8280                         return entry;
8281         }
8282
8283         return NULL;
8284 }
8285
8286 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8287 {
8288         struct extent_entry *entry, *best = NULL, *prev = NULL;
8289
8290         list_for_each_entry(entry, entries, list) {
8291                 /*
8292                  * If there are as many broken entries as entries then we know
8293                  * not to trust this particular entry.
8294                  */
8295                 if (entry->broken == entry->count)
8296                         continue;
8297
8298                 /*
8299                  * Special case, when there are only two entries and 'best' is
8300                  * the first one
8301                  */
8302                 if (!prev) {
8303                         best = entry;
8304                         prev = entry;
8305                         continue;
8306                 }
8307
8308                 /*
8309                  * If our current entry == best then we can't be sure our best
8310                  * is really the best, so we need to keep searching.
8311                  */
8312                 if (best && best->count == entry->count) {
8313                         prev = entry;
8314                         best = NULL;
8315                         continue;
8316                 }
8317
8318                 /* Prev == entry, not good enough, have to keep searching */
8319                 if (!prev->broken && prev->count == entry->count)
8320                         continue;
8321
8322                 if (!best)
8323                         best = (prev->count > entry->count) ? prev : entry;
8324                 else if (best->count < entry->count)
8325                         best = entry;
8326                 prev = entry;
8327         }
8328
8329         return best;
8330 }
8331
8332 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8333                       struct data_backref *dback, struct extent_entry *entry)
8334 {
8335         struct btrfs_trans_handle *trans;
8336         struct btrfs_root *root;
8337         struct btrfs_file_extent_item *fi;
8338         struct extent_buffer *leaf;
8339         struct btrfs_key key;
8340         u64 bytenr, bytes;
8341         int ret, err;
8342
8343         key.objectid = dback->root;
8344         key.type = BTRFS_ROOT_ITEM_KEY;
8345         key.offset = (u64)-1;
8346         root = btrfs_read_fs_root(info, &key);
8347         if (IS_ERR(root)) {
8348                 fprintf(stderr, "Couldn't find root for our ref\n");
8349                 return -EINVAL;
8350         }
8351
8352         /*
8353          * The backref points to the original offset of the extent if it was
8354          * split, so we need to search down to the offset we have and then walk
8355          * forward until we find the backref we're looking for.
8356          */
8357         key.objectid = dback->owner;
8358         key.type = BTRFS_EXTENT_DATA_KEY;
8359         key.offset = dback->offset;
8360         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8361         if (ret < 0) {
8362                 fprintf(stderr, "Error looking up ref %d\n", ret);
8363                 return ret;
8364         }
8365
8366         while (1) {
8367                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8368                         ret = btrfs_next_leaf(root, path);
8369                         if (ret) {
8370                                 fprintf(stderr, "Couldn't find our ref, next\n");
8371                                 return -EINVAL;
8372                         }
8373                 }
8374                 leaf = path->nodes[0];
8375                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8376                 if (key.objectid != dback->owner ||
8377                     key.type != BTRFS_EXTENT_DATA_KEY) {
8378                         fprintf(stderr, "Couldn't find our ref, search\n");
8379                         return -EINVAL;
8380                 }
8381                 fi = btrfs_item_ptr(leaf, path->slots[0],
8382                                     struct btrfs_file_extent_item);
8383                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8384                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8385
8386                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8387                         break;
8388                 path->slots[0]++;
8389         }
8390
8391         btrfs_release_path(path);
8392
8393         trans = btrfs_start_transaction(root, 1);
8394         if (IS_ERR(trans))
8395                 return PTR_ERR(trans);
8396
8397         /*
8398          * Ok we have the key of the file extent we want to fix, now we can cow
8399          * down to the thing and fix it.
8400          */
8401         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8402         if (ret < 0) {
8403                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8404                         key.objectid, key.type, key.offset, ret);
8405                 goto out;
8406         }
8407         if (ret > 0) {
8408                 fprintf(stderr, "Well that's odd, we just found this key "
8409                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8410                         key.offset);
8411                 ret = -EINVAL;
8412                 goto out;
8413         }
8414         leaf = path->nodes[0];
8415         fi = btrfs_item_ptr(leaf, path->slots[0],
8416                             struct btrfs_file_extent_item);
8417
8418         if (btrfs_file_extent_compression(leaf, fi) &&
8419             dback->disk_bytenr != entry->bytenr) {
8420                 fprintf(stderr, "Ref doesn't match the record start and is "
8421                         "compressed, please take a btrfs-image of this file "
8422                         "system and send it to a btrfs developer so they can "
8423                         "complete this functionality for bytenr %Lu\n",
8424                         dback->disk_bytenr);
8425                 ret = -EINVAL;
8426                 goto out;
8427         }
8428
8429         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8430                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8431         } else if (dback->disk_bytenr > entry->bytenr) {
8432                 u64 off_diff, offset;
8433
8434                 off_diff = dback->disk_bytenr - entry->bytenr;
8435                 offset = btrfs_file_extent_offset(leaf, fi);
8436                 if (dback->disk_bytenr + offset +
8437                     btrfs_file_extent_num_bytes(leaf, fi) >
8438                     entry->bytenr + entry->bytes) {
8439                         fprintf(stderr, "Ref is past the entry end, please "
8440                                 "take a btrfs-image of this file system and "
8441                                 "send it to a btrfs developer, ref %Lu\n",
8442                                 dback->disk_bytenr);
8443                         ret = -EINVAL;
8444                         goto out;
8445                 }
8446                 offset += off_diff;
8447                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8448                 btrfs_set_file_extent_offset(leaf, fi, offset);
8449         } else if (dback->disk_bytenr < entry->bytenr) {
8450                 u64 offset;
8451
8452                 offset = btrfs_file_extent_offset(leaf, fi);
8453                 if (dback->disk_bytenr + offset < entry->bytenr) {
8454                         fprintf(stderr, "Ref is before the entry start, please"
8455                                 " take a btrfs-image of this file system and "
8456                                 "send it to a btrfs developer, ref %Lu\n",
8457                                 dback->disk_bytenr);
8458                         ret = -EINVAL;
8459                         goto out;
8460                 }
8461
8462                 offset += dback->disk_bytenr;
8463                 offset -= entry->bytenr;
8464                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8465                 btrfs_set_file_extent_offset(leaf, fi, offset);
8466         }
8467
8468         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8469
8470         /*
8471          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8472          * only do this if we aren't using compression, otherwise it's a
8473          * trickier case.
8474          */
8475         if (!btrfs_file_extent_compression(leaf, fi))
8476                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8477         else
8478                 printf("ram bytes may be wrong?\n");
8479         btrfs_mark_buffer_dirty(leaf);
8480 out:
8481         err = btrfs_commit_transaction(trans, root);
8482         btrfs_release_path(path);
8483         return ret ? ret : err;
8484 }
8485
8486 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8487                            struct extent_record *rec)
8488 {
8489         struct extent_backref *back;
8490         struct data_backref *dback;
8491         struct extent_entry *entry, *best = NULL;
8492         LIST_HEAD(entries);
8493         int nr_entries = 0;
8494         int broken_entries = 0;
8495         int ret = 0;
8496         short mismatch = 0;
8497
8498         /*
8499          * Metadata is easy and the backrefs should always agree on bytenr and
8500          * size, if not we've got bigger issues.
8501          */
8502         if (rec->metadata)
8503                 return 0;
8504
8505         list_for_each_entry(back, &rec->backrefs, list) {
8506                 if (back->full_backref || !back->is_data)
8507                         continue;
8508
8509                 dback = to_data_backref(back);
8510
8511                 /*
8512                  * We only pay attention to backrefs that we found a real
8513                  * backref for.
8514                  */
8515                 if (dback->found_ref == 0)
8516                         continue;
8517
8518                 /*
8519                  * For now we only catch when the bytes don't match, not the
8520                  * bytenr.  We can easily do this at the same time, but I want
8521                  * to have a fs image to test on before we just add repair
8522                  * functionality willy-nilly so we know we won't screw up the
8523                  * repair.
8524                  */
8525
8526                 entry = find_entry(&entries, dback->disk_bytenr,
8527                                    dback->bytes);
8528                 if (!entry) {
8529                         entry = malloc(sizeof(struct extent_entry));
8530                         if (!entry) {
8531                                 ret = -ENOMEM;
8532                                 goto out;
8533                         }
8534                         memset(entry, 0, sizeof(*entry));
8535                         entry->bytenr = dback->disk_bytenr;
8536                         entry->bytes = dback->bytes;
8537                         list_add_tail(&entry->list, &entries);
8538                         nr_entries++;
8539                 }
8540
8541                 /*
8542                  * If we only have on entry we may think the entries agree when
8543                  * in reality they don't so we have to do some extra checking.
8544                  */
8545                 if (dback->disk_bytenr != rec->start ||
8546                     dback->bytes != rec->nr || back->broken)
8547                         mismatch = 1;
8548
8549                 if (back->broken) {
8550                         entry->broken++;
8551                         broken_entries++;
8552                 }
8553
8554                 entry->count++;
8555         }
8556
8557         /* Yay all the backrefs agree, carry on good sir */
8558         if (nr_entries <= 1 && !mismatch)
8559                 goto out;
8560
8561         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8562                 "%Lu\n", rec->start);
8563
8564         /*
8565          * First we want to see if the backrefs can agree amongst themselves who
8566          * is right, so figure out which one of the entries has the highest
8567          * count.
8568          */
8569         best = find_most_right_entry(&entries);
8570
8571         /*
8572          * Ok so we may have an even split between what the backrefs think, so
8573          * this is where we use the extent ref to see what it thinks.
8574          */
8575         if (!best) {
8576                 entry = find_entry(&entries, rec->start, rec->nr);
8577                 if (!entry && (!broken_entries || !rec->found_rec)) {
8578                         fprintf(stderr, "Backrefs don't agree with each other "
8579                                 "and extent record doesn't agree with anybody,"
8580                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8581                                 rec->start, rec->nr);
8582                         ret = -EINVAL;
8583                         goto out;
8584                 } else if (!entry) {
8585                         /*
8586                          * Ok our backrefs were broken, we'll assume this is the
8587                          * correct value and add an entry for this range.
8588                          */
8589                         entry = malloc(sizeof(struct extent_entry));
8590                         if (!entry) {
8591                                 ret = -ENOMEM;
8592                                 goto out;
8593                         }
8594                         memset(entry, 0, sizeof(*entry));
8595                         entry->bytenr = rec->start;
8596                         entry->bytes = rec->nr;
8597                         list_add_tail(&entry->list, &entries);
8598                         nr_entries++;
8599                 }
8600                 entry->count++;
8601                 best = find_most_right_entry(&entries);
8602                 if (!best) {
8603                         fprintf(stderr, "Backrefs and extent record evenly "
8604                                 "split on who is right, this is going to "
8605                                 "require user input to fix bytenr %Lu bytes "
8606                                 "%Lu\n", rec->start, rec->nr);
8607                         ret = -EINVAL;
8608                         goto out;
8609                 }
8610         }
8611
8612         /*
8613          * I don't think this can happen currently as we'll abort() if we catch
8614          * this case higher up, but in case somebody removes that we still can't
8615          * deal with it properly here yet, so just bail out of that's the case.
8616          */
8617         if (best->bytenr != rec->start) {
8618                 fprintf(stderr, "Extent start and backref starts don't match, "
8619                         "please use btrfs-image on this file system and send "
8620                         "it to a btrfs developer so they can make fsck fix "
8621                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8622                         rec->start, rec->nr);
8623                 ret = -EINVAL;
8624                 goto out;
8625         }
8626
8627         /*
8628          * Ok great we all agreed on an extent record, let's go find the real
8629          * references and fix up the ones that don't match.
8630          */
8631         list_for_each_entry(back, &rec->backrefs, list) {
8632                 if (back->full_backref || !back->is_data)
8633                         continue;
8634
8635                 dback = to_data_backref(back);
8636
8637                 /*
8638                  * Still ignoring backrefs that don't have a real ref attached
8639                  * to them.
8640                  */
8641                 if (dback->found_ref == 0)
8642                         continue;
8643
8644                 if (dback->bytes == best->bytes &&
8645                     dback->disk_bytenr == best->bytenr)
8646                         continue;
8647
8648                 ret = repair_ref(info, path, dback, best);
8649                 if (ret)
8650                         goto out;
8651         }
8652
8653         /*
8654          * Ok we messed with the actual refs, which means we need to drop our
8655          * entire cache and go back and rescan.  I know this is a huge pain and
8656          * adds a lot of extra work, but it's the only way to be safe.  Once all
8657          * the backrefs agree we may not need to do anything to the extent
8658          * record itself.
8659          */
8660         ret = -EAGAIN;
8661 out:
8662         while (!list_empty(&entries)) {
8663                 entry = list_entry(entries.next, struct extent_entry, list);
8664                 list_del_init(&entry->list);
8665                 free(entry);
8666         }
8667         return ret;
8668 }
8669
8670 static int process_duplicates(struct cache_tree *extent_cache,
8671                               struct extent_record *rec)
8672 {
8673         struct extent_record *good, *tmp;
8674         struct cache_extent *cache;
8675         int ret;
8676
8677         /*
8678          * If we found a extent record for this extent then return, or if we
8679          * have more than one duplicate we are likely going to need to delete
8680          * something.
8681          */
8682         if (rec->found_rec || rec->num_duplicates > 1)
8683                 return 0;
8684
8685         /* Shouldn't happen but just in case */
8686         BUG_ON(!rec->num_duplicates);
8687
8688         /*
8689          * So this happens if we end up with a backref that doesn't match the
8690          * actual extent entry.  So either the backref is bad or the extent
8691          * entry is bad.  Either way we want to have the extent_record actually
8692          * reflect what we found in the extent_tree, so we need to take the
8693          * duplicate out and use that as the extent_record since the only way we
8694          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8695          */
8696         remove_cache_extent(extent_cache, &rec->cache);
8697
8698         good = to_extent_record(rec->dups.next);
8699         list_del_init(&good->list);
8700         INIT_LIST_HEAD(&good->backrefs);
8701         INIT_LIST_HEAD(&good->dups);
8702         good->cache.start = good->start;
8703         good->cache.size = good->nr;
8704         good->content_checked = 0;
8705         good->owner_ref_checked = 0;
8706         good->num_duplicates = 0;
8707         good->refs = rec->refs;
8708         list_splice_init(&rec->backrefs, &good->backrefs);
8709         while (1) {
8710                 cache = lookup_cache_extent(extent_cache, good->start,
8711                                             good->nr);
8712                 if (!cache)
8713                         break;
8714                 tmp = container_of(cache, struct extent_record, cache);
8715
8716                 /*
8717                  * If we find another overlapping extent and it's found_rec is
8718                  * set then it's a duplicate and we need to try and delete
8719                  * something.
8720                  */
8721                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8722                         if (list_empty(&good->list))
8723                                 list_add_tail(&good->list,
8724                                               &duplicate_extents);
8725                         good->num_duplicates += tmp->num_duplicates + 1;
8726                         list_splice_init(&tmp->dups, &good->dups);
8727                         list_del_init(&tmp->list);
8728                         list_add_tail(&tmp->list, &good->dups);
8729                         remove_cache_extent(extent_cache, &tmp->cache);
8730                         continue;
8731                 }
8732
8733                 /*
8734                  * Ok we have another non extent item backed extent rec, so lets
8735                  * just add it to this extent and carry on like we did above.
8736                  */
8737                 good->refs += tmp->refs;
8738                 list_splice_init(&tmp->backrefs, &good->backrefs);
8739                 remove_cache_extent(extent_cache, &tmp->cache);
8740                 free(tmp);
8741         }
8742         ret = insert_cache_extent(extent_cache, &good->cache);
8743         BUG_ON(ret);
8744         free(rec);
8745         return good->num_duplicates ? 0 : 1;
8746 }
8747
8748 static int delete_duplicate_records(struct btrfs_root *root,
8749                                     struct extent_record *rec)
8750 {
8751         struct btrfs_trans_handle *trans;
8752         LIST_HEAD(delete_list);
8753         struct btrfs_path path;
8754         struct extent_record *tmp, *good, *n;
8755         int nr_del = 0;
8756         int ret = 0, err;
8757         struct btrfs_key key;
8758
8759         btrfs_init_path(&path);
8760
8761         good = rec;
8762         /* Find the record that covers all of the duplicates. */
8763         list_for_each_entry(tmp, &rec->dups, list) {
8764                 if (good->start < tmp->start)
8765                         continue;
8766                 if (good->nr > tmp->nr)
8767                         continue;
8768
8769                 if (tmp->start + tmp->nr < good->start + good->nr) {
8770                         fprintf(stderr, "Ok we have overlapping extents that "
8771                                 "aren't completely covered by each other, this "
8772                                 "is going to require more careful thought.  "
8773                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8774                                 tmp->start, tmp->nr, good->start, good->nr);
8775                         abort();
8776                 }
8777                 good = tmp;
8778         }
8779
8780         if (good != rec)
8781                 list_add_tail(&rec->list, &delete_list);
8782
8783         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8784                 if (tmp == good)
8785                         continue;
8786                 list_move_tail(&tmp->list, &delete_list);
8787         }
8788
8789         root = root->fs_info->extent_root;
8790         trans = btrfs_start_transaction(root, 1);
8791         if (IS_ERR(trans)) {
8792                 ret = PTR_ERR(trans);
8793                 goto out;
8794         }
8795
8796         list_for_each_entry(tmp, &delete_list, list) {
8797                 if (tmp->found_rec == 0)
8798                         continue;
8799                 key.objectid = tmp->start;
8800                 key.type = BTRFS_EXTENT_ITEM_KEY;
8801                 key.offset = tmp->nr;
8802
8803                 /* Shouldn't happen but just in case */
8804                 if (tmp->metadata) {
8805                         fprintf(stderr, "Well this shouldn't happen, extent "
8806                                 "record overlaps but is metadata? "
8807                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8808                         abort();
8809                 }
8810
8811                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8812                 if (ret) {
8813                         if (ret > 0)
8814                                 ret = -EINVAL;
8815                         break;
8816                 }
8817                 ret = btrfs_del_item(trans, root, &path);
8818                 if (ret)
8819                         break;
8820                 btrfs_release_path(&path);
8821                 nr_del++;
8822         }
8823         err = btrfs_commit_transaction(trans, root);
8824         if (err && !ret)
8825                 ret = err;
8826 out:
8827         while (!list_empty(&delete_list)) {
8828                 tmp = to_extent_record(delete_list.next);
8829                 list_del_init(&tmp->list);
8830                 if (tmp == rec)
8831                         continue;
8832                 free(tmp);
8833         }
8834
8835         while (!list_empty(&rec->dups)) {
8836                 tmp = to_extent_record(rec->dups.next);
8837                 list_del_init(&tmp->list);
8838                 free(tmp);
8839         }
8840
8841         btrfs_release_path(&path);
8842
8843         if (!ret && !nr_del)
8844                 rec->num_duplicates = 0;
8845
8846         return ret ? ret : nr_del;
8847 }
8848
8849 static int find_possible_backrefs(struct btrfs_fs_info *info,
8850                                   struct btrfs_path *path,
8851                                   struct cache_tree *extent_cache,
8852                                   struct extent_record *rec)
8853 {
8854         struct btrfs_root *root;
8855         struct extent_backref *back;
8856         struct data_backref *dback;
8857         struct cache_extent *cache;
8858         struct btrfs_file_extent_item *fi;
8859         struct btrfs_key key;
8860         u64 bytenr, bytes;
8861         int ret;
8862
8863         list_for_each_entry(back, &rec->backrefs, list) {
8864                 /* Don't care about full backrefs (poor unloved backrefs) */
8865                 if (back->full_backref || !back->is_data)
8866                         continue;
8867
8868                 dback = to_data_backref(back);
8869
8870                 /* We found this one, we don't need to do a lookup */
8871                 if (dback->found_ref)
8872                         continue;
8873
8874                 key.objectid = dback->root;
8875                 key.type = BTRFS_ROOT_ITEM_KEY;
8876                 key.offset = (u64)-1;
8877
8878                 root = btrfs_read_fs_root(info, &key);
8879
8880                 /* No root, definitely a bad ref, skip */
8881                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8882                         continue;
8883                 /* Other err, exit */
8884                 if (IS_ERR(root))
8885                         return PTR_ERR(root);
8886
8887                 key.objectid = dback->owner;
8888                 key.type = BTRFS_EXTENT_DATA_KEY;
8889                 key.offset = dback->offset;
8890                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8891                 if (ret) {
8892                         btrfs_release_path(path);
8893                         if (ret < 0)
8894                                 return ret;
8895                         /* Didn't find it, we can carry on */
8896                         ret = 0;
8897                         continue;
8898                 }
8899
8900                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8901                                     struct btrfs_file_extent_item);
8902                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8903                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8904                 btrfs_release_path(path);
8905                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8906                 if (cache) {
8907                         struct extent_record *tmp;
8908                         tmp = container_of(cache, struct extent_record, cache);
8909
8910                         /*
8911                          * If we found an extent record for the bytenr for this
8912                          * particular backref then we can't add it to our
8913                          * current extent record.  We only want to add backrefs
8914                          * that don't have a corresponding extent item in the
8915                          * extent tree since they likely belong to this record
8916                          * and we need to fix it if it doesn't match bytenrs.
8917                          */
8918                         if  (tmp->found_rec)
8919                                 continue;
8920                 }
8921
8922                 dback->found_ref += 1;
8923                 dback->disk_bytenr = bytenr;
8924                 dback->bytes = bytes;
8925
8926                 /*
8927                  * Set this so the verify backref code knows not to trust the
8928                  * values in this backref.
8929                  */
8930                 back->broken = 1;
8931         }
8932
8933         return 0;
8934 }
8935
8936 /*
8937  * Record orphan data ref into corresponding root.
8938  *
8939  * Return 0 if the extent item contains data ref and recorded.
8940  * Return 1 if the extent item contains no useful data ref
8941  *   On that case, it may contains only shared_dataref or metadata backref
8942  *   or the file extent exists(this should be handled by the extent bytenr
8943  *   recovery routine)
8944  * Return <0 if something goes wrong.
8945  */
8946 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8947                                       struct extent_record *rec)
8948 {
8949         struct btrfs_key key;
8950         struct btrfs_root *dest_root;
8951         struct extent_backref *back;
8952         struct data_backref *dback;
8953         struct orphan_data_extent *orphan;
8954         struct btrfs_path path;
8955         int recorded_data_ref = 0;
8956         int ret = 0;
8957
8958         if (rec->metadata)
8959                 return 1;
8960         btrfs_init_path(&path);
8961         list_for_each_entry(back, &rec->backrefs, list) {
8962                 if (back->full_backref || !back->is_data ||
8963                     !back->found_extent_tree)
8964                         continue;
8965                 dback = to_data_backref(back);
8966                 if (dback->found_ref)
8967                         continue;
8968                 key.objectid = dback->root;
8969                 key.type = BTRFS_ROOT_ITEM_KEY;
8970                 key.offset = (u64)-1;
8971
8972                 dest_root = btrfs_read_fs_root(fs_info, &key);
8973
8974                 /* For non-exist root we just skip it */
8975                 if (IS_ERR(dest_root) || !dest_root)
8976                         continue;
8977
8978                 key.objectid = dback->owner;
8979                 key.type = BTRFS_EXTENT_DATA_KEY;
8980                 key.offset = dback->offset;
8981
8982                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8983                 btrfs_release_path(&path);
8984                 /*
8985                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8986                  * we need to record it for inode/file extent rebuild.
8987                  * For ret > 0, we record it only for file extent rebuild.
8988                  * For ret == 0, the file extent exists but only bytenr
8989                  * mismatch, let the original bytenr fix routine to handle,
8990                  * don't record it.
8991                  */
8992                 if (ret == 0)
8993                         continue;
8994                 ret = 0;
8995                 orphan = malloc(sizeof(*orphan));
8996                 if (!orphan) {
8997                         ret = -ENOMEM;
8998                         goto out;
8999                 }
9000                 INIT_LIST_HEAD(&orphan->list);
9001                 orphan->root = dback->root;
9002                 orphan->objectid = dback->owner;
9003                 orphan->offset = dback->offset;
9004                 orphan->disk_bytenr = rec->cache.start;
9005                 orphan->disk_len = rec->cache.size;
9006                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9007                 recorded_data_ref = 1;
9008         }
9009 out:
9010         btrfs_release_path(&path);
9011         if (!ret)
9012                 return !recorded_data_ref;
9013         else
9014                 return ret;
9015 }
9016
9017 /*
9018  * when an incorrect extent item is found, this will delete
9019  * all of the existing entries for it and recreate them
9020  * based on what the tree scan found.
9021  */
9022 static int fixup_extent_refs(struct btrfs_fs_info *info,
9023                              struct cache_tree *extent_cache,
9024                              struct extent_record *rec)
9025 {
9026         struct btrfs_trans_handle *trans = NULL;
9027         int ret;
9028         struct btrfs_path path;
9029         struct list_head *cur = rec->backrefs.next;
9030         struct cache_extent *cache;
9031         struct extent_backref *back;
9032         int allocated = 0;
9033         u64 flags = 0;
9034
9035         if (rec->flag_block_full_backref)
9036                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9037
9038         btrfs_init_path(&path);
9039         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9040                 /*
9041                  * Sometimes the backrefs themselves are so broken they don't
9042                  * get attached to any meaningful rec, so first go back and
9043                  * check any of our backrefs that we couldn't find and throw
9044                  * them into the list if we find the backref so that
9045                  * verify_backrefs can figure out what to do.
9046                  */
9047                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9048                 if (ret < 0)
9049                         goto out;
9050         }
9051
9052         /* step one, make sure all of the backrefs agree */
9053         ret = verify_backrefs(info, &path, rec);
9054         if (ret < 0)
9055                 goto out;
9056
9057         trans = btrfs_start_transaction(info->extent_root, 1);
9058         if (IS_ERR(trans)) {
9059                 ret = PTR_ERR(trans);
9060                 goto out;
9061         }
9062
9063         /* step two, delete all the existing records */
9064         ret = delete_extent_records(trans, info->extent_root, &path,
9065                                     rec->start);
9066
9067         if (ret < 0)
9068                 goto out;
9069
9070         /* was this block corrupt?  If so, don't add references to it */
9071         cache = lookup_cache_extent(info->corrupt_blocks,
9072                                     rec->start, rec->max_size);
9073         if (cache) {
9074                 ret = 0;
9075                 goto out;
9076         }
9077
9078         /* step three, recreate all the refs we did find */
9079         while(cur != &rec->backrefs) {
9080                 back = to_extent_backref(cur);
9081                 cur = cur->next;
9082
9083                 /*
9084                  * if we didn't find any references, don't create a
9085                  * new extent record
9086                  */
9087                 if (!back->found_ref)
9088                         continue;
9089
9090                 rec->bad_full_backref = 0;
9091                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9092                 allocated = 1;
9093
9094                 if (ret)
9095                         goto out;
9096         }
9097 out:
9098         if (trans) {
9099                 int err = btrfs_commit_transaction(trans, info->extent_root);
9100                 if (!ret)
9101                         ret = err;
9102         }
9103
9104         if (!ret)
9105                 fprintf(stderr, "Repaired extent references for %llu\n",
9106                                 (unsigned long long)rec->start);
9107
9108         btrfs_release_path(&path);
9109         return ret;
9110 }
9111
9112 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9113                               struct extent_record *rec)
9114 {
9115         struct btrfs_trans_handle *trans;
9116         struct btrfs_root *root = fs_info->extent_root;
9117         struct btrfs_path path;
9118         struct btrfs_extent_item *ei;
9119         struct btrfs_key key;
9120         u64 flags;
9121         int ret = 0;
9122
9123         key.objectid = rec->start;
9124         if (rec->metadata) {
9125                 key.type = BTRFS_METADATA_ITEM_KEY;
9126                 key.offset = rec->info_level;
9127         } else {
9128                 key.type = BTRFS_EXTENT_ITEM_KEY;
9129                 key.offset = rec->max_size;
9130         }
9131
9132         trans = btrfs_start_transaction(root, 0);
9133         if (IS_ERR(trans))
9134                 return PTR_ERR(trans);
9135
9136         btrfs_init_path(&path);
9137         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9138         if (ret < 0) {
9139                 btrfs_release_path(&path);
9140                 btrfs_commit_transaction(trans, root);
9141                 return ret;
9142         } else if (ret) {
9143                 fprintf(stderr, "Didn't find extent for %llu\n",
9144                         (unsigned long long)rec->start);
9145                 btrfs_release_path(&path);
9146                 btrfs_commit_transaction(trans, root);
9147                 return -ENOENT;
9148         }
9149
9150         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9151                             struct btrfs_extent_item);
9152         flags = btrfs_extent_flags(path.nodes[0], ei);
9153         if (rec->flag_block_full_backref) {
9154                 fprintf(stderr, "setting full backref on %llu\n",
9155                         (unsigned long long)key.objectid);
9156                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9157         } else {
9158                 fprintf(stderr, "clearing full backref on %llu\n",
9159                         (unsigned long long)key.objectid);
9160                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9161         }
9162         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9163         btrfs_mark_buffer_dirty(path.nodes[0]);
9164         btrfs_release_path(&path);
9165         ret = btrfs_commit_transaction(trans, root);
9166         if (!ret)
9167                 fprintf(stderr, "Repaired extent flags for %llu\n",
9168                                 (unsigned long long)rec->start);
9169
9170         return ret;
9171 }
9172
9173 /* right now we only prune from the extent allocation tree */
9174 static int prune_one_block(struct btrfs_trans_handle *trans,
9175                            struct btrfs_fs_info *info,
9176                            struct btrfs_corrupt_block *corrupt)
9177 {
9178         int ret;
9179         struct btrfs_path path;
9180         struct extent_buffer *eb;
9181         u64 found;
9182         int slot;
9183         int nritems;
9184         int level = corrupt->level + 1;
9185
9186         btrfs_init_path(&path);
9187 again:
9188         /* we want to stop at the parent to our busted block */
9189         path.lowest_level = level;
9190
9191         ret = btrfs_search_slot(trans, info->extent_root,
9192                                 &corrupt->key, &path, -1, 1);
9193
9194         if (ret < 0)
9195                 goto out;
9196
9197         eb = path.nodes[level];
9198         if (!eb) {
9199                 ret = -ENOENT;
9200                 goto out;
9201         }
9202
9203         /*
9204          * hopefully the search gave us the block we want to prune,
9205          * lets try that first
9206          */
9207         slot = path.slots[level];
9208         found =  btrfs_node_blockptr(eb, slot);
9209         if (found == corrupt->cache.start)
9210                 goto del_ptr;
9211
9212         nritems = btrfs_header_nritems(eb);
9213
9214         /* the search failed, lets scan this node and hope we find it */
9215         for (slot = 0; slot < nritems; slot++) {
9216                 found =  btrfs_node_blockptr(eb, slot);
9217                 if (found == corrupt->cache.start)
9218                         goto del_ptr;
9219         }
9220         /*
9221          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9222          * to this block
9223          */
9224         if (eb == info->extent_root->node) {
9225                 ret = -ENOENT;
9226                 goto out;
9227         } else {
9228                 level++;
9229                 btrfs_release_path(&path);
9230                 goto again;
9231         }
9232
9233 del_ptr:
9234         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9235         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9236
9237 out:
9238         btrfs_release_path(&path);
9239         return ret;
9240 }
9241
9242 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9243 {
9244         struct btrfs_trans_handle *trans = NULL;
9245         struct cache_extent *cache;
9246         struct btrfs_corrupt_block *corrupt;
9247
9248         while (1) {
9249                 cache = search_cache_extent(info->corrupt_blocks, 0);
9250                 if (!cache)
9251                         break;
9252                 if (!trans) {
9253                         trans = btrfs_start_transaction(info->extent_root, 1);
9254                         if (IS_ERR(trans))
9255                                 return PTR_ERR(trans);
9256                 }
9257                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9258                 prune_one_block(trans, info, corrupt);
9259                 remove_cache_extent(info->corrupt_blocks, cache);
9260         }
9261         if (trans)
9262                 return btrfs_commit_transaction(trans, info->extent_root);
9263         return 0;
9264 }
9265
9266 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9267 {
9268         struct btrfs_block_group_cache *cache;
9269         u64 start, end;
9270         int ret;
9271
9272         while (1) {
9273                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9274                                             &start, &end, EXTENT_DIRTY);
9275                 if (ret)
9276                         break;
9277                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9278         }
9279
9280         start = 0;
9281         while (1) {
9282                 cache = btrfs_lookup_first_block_group(fs_info, start);
9283                 if (!cache)
9284                         break;
9285                 if (cache->cached)
9286                         cache->cached = 0;
9287                 start = cache->key.objectid + cache->key.offset;
9288         }
9289 }
9290
9291 static int check_extent_refs(struct btrfs_root *root,
9292                              struct cache_tree *extent_cache)
9293 {
9294         struct extent_record *rec;
9295         struct cache_extent *cache;
9296         int ret = 0;
9297         int had_dups = 0;
9298
9299         if (repair) {
9300                 /*
9301                  * if we're doing a repair, we have to make sure
9302                  * we don't allocate from the problem extents.
9303                  * In the worst case, this will be all the
9304                  * extents in the FS
9305                  */
9306                 cache = search_cache_extent(extent_cache, 0);
9307                 while(cache) {
9308                         rec = container_of(cache, struct extent_record, cache);
9309                         set_extent_dirty(root->fs_info->excluded_extents,
9310                                          rec->start,
9311                                          rec->start + rec->max_size - 1);
9312                         cache = next_cache_extent(cache);
9313                 }
9314
9315                 /* pin down all the corrupted blocks too */
9316                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9317                 while(cache) {
9318                         set_extent_dirty(root->fs_info->excluded_extents,
9319                                          cache->start,
9320                                          cache->start + cache->size - 1);
9321                         cache = next_cache_extent(cache);
9322                 }
9323                 prune_corrupt_blocks(root->fs_info);
9324                 reset_cached_block_groups(root->fs_info);
9325         }
9326
9327         reset_cached_block_groups(root->fs_info);
9328
9329         /*
9330          * We need to delete any duplicate entries we find first otherwise we
9331          * could mess up the extent tree when we have backrefs that actually
9332          * belong to a different extent item and not the weird duplicate one.
9333          */
9334         while (repair && !list_empty(&duplicate_extents)) {
9335                 rec = to_extent_record(duplicate_extents.next);
9336                 list_del_init(&rec->list);
9337
9338                 /* Sometimes we can find a backref before we find an actual
9339                  * extent, so we need to process it a little bit to see if there
9340                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9341                  * if this is a backref screwup.  If we need to delete stuff
9342                  * process_duplicates() will return 0, otherwise it will return
9343                  * 1 and we
9344                  */
9345                 if (process_duplicates(extent_cache, rec))
9346                         continue;
9347                 ret = delete_duplicate_records(root, rec);
9348                 if (ret < 0)
9349                         return ret;
9350                 /*
9351                  * delete_duplicate_records will return the number of entries
9352                  * deleted, so if it's greater than 0 then we know we actually
9353                  * did something and we need to remove.
9354                  */
9355                 if (ret)
9356                         had_dups = 1;
9357         }
9358
9359         if (had_dups)
9360                 return -EAGAIN;
9361
9362         while(1) {
9363                 int cur_err = 0;
9364                 int fix = 0;
9365
9366                 cache = search_cache_extent(extent_cache, 0);
9367                 if (!cache)
9368                         break;
9369                 rec = container_of(cache, struct extent_record, cache);
9370                 if (rec->num_duplicates) {
9371                         fprintf(stderr, "extent item %llu has multiple extent "
9372                                 "items\n", (unsigned long long)rec->start);
9373                         cur_err = 1;
9374                 }
9375
9376                 if (rec->refs != rec->extent_item_refs) {
9377                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9378                                 (unsigned long long)rec->start,
9379                                 (unsigned long long)rec->nr);
9380                         fprintf(stderr, "extent item %llu, found %llu\n",
9381                                 (unsigned long long)rec->extent_item_refs,
9382                                 (unsigned long long)rec->refs);
9383                         ret = record_orphan_data_extents(root->fs_info, rec);
9384                         if (ret < 0)
9385                                 goto repair_abort;
9386                         fix = ret;
9387                         cur_err = 1;
9388                 }
9389                 if (all_backpointers_checked(rec, 1)) {
9390                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9391                                 (unsigned long long)rec->start,
9392                                 (unsigned long long)rec->nr);
9393                         fix = 1;
9394                         cur_err = 1;
9395                 }
9396                 if (!rec->owner_ref_checked) {
9397                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9398                                 (unsigned long long)rec->start,
9399                                 (unsigned long long)rec->nr);
9400                         fix = 1;
9401                         cur_err = 1;
9402                 }
9403
9404                 if (repair && fix) {
9405                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9406                         if (ret)
9407                                 goto repair_abort;
9408                 }
9409
9410
9411                 if (rec->bad_full_backref) {
9412                         fprintf(stderr, "bad full backref, on [%llu]\n",
9413                                 (unsigned long long)rec->start);
9414                         if (repair) {
9415                                 ret = fixup_extent_flags(root->fs_info, rec);
9416                                 if (ret)
9417                                         goto repair_abort;
9418                                 fix = 1;
9419                         }
9420                         cur_err = 1;
9421                 }
9422                 /*
9423                  * Although it's not a extent ref's problem, we reuse this
9424                  * routine for error reporting.
9425                  * No repair function yet.
9426                  */
9427                 if (rec->crossing_stripes) {
9428                         fprintf(stderr,
9429                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9430                                 rec->start, rec->start + rec->max_size);
9431                         cur_err = 1;
9432                 }
9433
9434                 if (rec->wrong_chunk_type) {
9435                         fprintf(stderr,
9436                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9437                                 rec->start, rec->start + rec->max_size);
9438                         cur_err = 1;
9439                 }
9440
9441                 remove_cache_extent(extent_cache, cache);
9442                 free_all_extent_backrefs(rec);
9443                 if (!init_extent_tree && repair && (!cur_err || fix))
9444                         clear_extent_dirty(root->fs_info->excluded_extents,
9445                                            rec->start,
9446                                            rec->start + rec->max_size - 1);
9447                 free(rec);
9448         }
9449 repair_abort:
9450         if (repair) {
9451                 if (ret && ret != -EAGAIN) {
9452                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9453                         exit(1);
9454                 } else if (!ret) {
9455                         struct btrfs_trans_handle *trans;
9456
9457                         root = root->fs_info->extent_root;
9458                         trans = btrfs_start_transaction(root, 1);
9459                         if (IS_ERR(trans)) {
9460                                 ret = PTR_ERR(trans);
9461                                 goto repair_abort;
9462                         }
9463
9464                         ret = btrfs_fix_block_accounting(trans, root);
9465                         if (ret)
9466                                 goto repair_abort;
9467                         ret = btrfs_commit_transaction(trans, root);
9468                         if (ret)
9469                                 goto repair_abort;
9470                 }
9471                 return ret;
9472         }
9473         return 0;
9474 }
9475
9476 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9477 {
9478         u64 stripe_size;
9479
9480         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9481                 stripe_size = length;
9482                 stripe_size /= num_stripes;
9483         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9484                 stripe_size = length * 2;
9485                 stripe_size /= num_stripes;
9486         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9487                 stripe_size = length;
9488                 stripe_size /= (num_stripes - 1);
9489         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9490                 stripe_size = length;
9491                 stripe_size /= (num_stripes - 2);
9492         } else {
9493                 stripe_size = length;
9494         }
9495         return stripe_size;
9496 }
9497
9498 /*
9499  * Check the chunk with its block group/dev list ref:
9500  * Return 0 if all refs seems valid.
9501  * Return 1 if part of refs seems valid, need later check for rebuild ref
9502  * like missing block group and needs to search extent tree to rebuild them.
9503  * Return -1 if essential refs are missing and unable to rebuild.
9504  */
9505 static int check_chunk_refs(struct chunk_record *chunk_rec,
9506                             struct block_group_tree *block_group_cache,
9507                             struct device_extent_tree *dev_extent_cache,
9508                             int silent)
9509 {
9510         struct cache_extent *block_group_item;
9511         struct block_group_record *block_group_rec;
9512         struct cache_extent *dev_extent_item;
9513         struct device_extent_record *dev_extent_rec;
9514         u64 devid;
9515         u64 offset;
9516         u64 length;
9517         int metadump_v2 = 0;
9518         int i;
9519         int ret = 0;
9520
9521         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9522                                                chunk_rec->offset,
9523                                                chunk_rec->length);
9524         if (block_group_item) {
9525                 block_group_rec = container_of(block_group_item,
9526                                                struct block_group_record,
9527                                                cache);
9528                 if (chunk_rec->length != block_group_rec->offset ||
9529                     chunk_rec->offset != block_group_rec->objectid ||
9530                     (!metadump_v2 &&
9531                      chunk_rec->type_flags != block_group_rec->flags)) {
9532                         if (!silent)
9533                                 fprintf(stderr,
9534                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9535                                         chunk_rec->objectid,
9536                                         chunk_rec->type,
9537                                         chunk_rec->offset,
9538                                         chunk_rec->length,
9539                                         chunk_rec->offset,
9540                                         chunk_rec->type_flags,
9541                                         block_group_rec->objectid,
9542                                         block_group_rec->type,
9543                                         block_group_rec->offset,
9544                                         block_group_rec->offset,
9545                                         block_group_rec->objectid,
9546                                         block_group_rec->flags);
9547                         ret = -1;
9548                 } else {
9549                         list_del_init(&block_group_rec->list);
9550                         chunk_rec->bg_rec = block_group_rec;
9551                 }
9552         } else {
9553                 if (!silent)
9554                         fprintf(stderr,
9555                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9556                                 chunk_rec->objectid,
9557                                 chunk_rec->type,
9558                                 chunk_rec->offset,
9559                                 chunk_rec->length,
9560                                 chunk_rec->offset,
9561                                 chunk_rec->type_flags);
9562                 ret = 1;
9563         }
9564
9565         if (metadump_v2)
9566                 return ret;
9567
9568         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9569                                     chunk_rec->num_stripes);
9570         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9571                 devid = chunk_rec->stripes[i].devid;
9572                 offset = chunk_rec->stripes[i].offset;
9573                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9574                                                        devid, offset, length);
9575                 if (dev_extent_item) {
9576                         dev_extent_rec = container_of(dev_extent_item,
9577                                                 struct device_extent_record,
9578                                                 cache);
9579                         if (dev_extent_rec->objectid != devid ||
9580                             dev_extent_rec->offset != offset ||
9581                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9582                             dev_extent_rec->length != length) {
9583                                 if (!silent)
9584                                         fprintf(stderr,
9585                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9586                                                 chunk_rec->objectid,
9587                                                 chunk_rec->type,
9588                                                 chunk_rec->offset,
9589                                                 chunk_rec->stripes[i].devid,
9590                                                 chunk_rec->stripes[i].offset,
9591                                                 dev_extent_rec->objectid,
9592                                                 dev_extent_rec->offset,
9593                                                 dev_extent_rec->length);
9594                                 ret = -1;
9595                         } else {
9596                                 list_move(&dev_extent_rec->chunk_list,
9597                                           &chunk_rec->dextents);
9598                         }
9599                 } else {
9600                         if (!silent)
9601                                 fprintf(stderr,
9602                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9603                                         chunk_rec->objectid,
9604                                         chunk_rec->type,
9605                                         chunk_rec->offset,
9606                                         chunk_rec->stripes[i].devid,
9607                                         chunk_rec->stripes[i].offset);
9608                         ret = -1;
9609                 }
9610         }
9611         return ret;
9612 }
9613
9614 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9615 int check_chunks(struct cache_tree *chunk_cache,
9616                  struct block_group_tree *block_group_cache,
9617                  struct device_extent_tree *dev_extent_cache,
9618                  struct list_head *good, struct list_head *bad,
9619                  struct list_head *rebuild, int silent)
9620 {
9621         struct cache_extent *chunk_item;
9622         struct chunk_record *chunk_rec;
9623         struct block_group_record *bg_rec;
9624         struct device_extent_record *dext_rec;
9625         int err;
9626         int ret = 0;
9627
9628         chunk_item = first_cache_extent(chunk_cache);
9629         while (chunk_item) {
9630                 chunk_rec = container_of(chunk_item, struct chunk_record,
9631                                          cache);
9632                 err = check_chunk_refs(chunk_rec, block_group_cache,
9633                                        dev_extent_cache, silent);
9634                 if (err < 0)
9635                         ret = err;
9636                 if (err == 0 && good)
9637                         list_add_tail(&chunk_rec->list, good);
9638                 if (err > 0 && rebuild)
9639                         list_add_tail(&chunk_rec->list, rebuild);
9640                 if (err < 0 && bad)
9641                         list_add_tail(&chunk_rec->list, bad);
9642                 chunk_item = next_cache_extent(chunk_item);
9643         }
9644
9645         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9646                 if (!silent)
9647                         fprintf(stderr,
9648                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9649                                 bg_rec->objectid,
9650                                 bg_rec->offset,
9651                                 bg_rec->flags);
9652                 if (!ret)
9653                         ret = 1;
9654         }
9655
9656         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9657                             chunk_list) {
9658                 if (!silent)
9659                         fprintf(stderr,
9660                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9661                                 dext_rec->objectid,
9662                                 dext_rec->offset,
9663                                 dext_rec->length);
9664                 if (!ret)
9665                         ret = 1;
9666         }
9667         return ret;
9668 }
9669
9670
9671 static int check_device_used(struct device_record *dev_rec,
9672                              struct device_extent_tree *dext_cache)
9673 {
9674         struct cache_extent *cache;
9675         struct device_extent_record *dev_extent_rec;
9676         u64 total_byte = 0;
9677
9678         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9679         while (cache) {
9680                 dev_extent_rec = container_of(cache,
9681                                               struct device_extent_record,
9682                                               cache);
9683                 if (dev_extent_rec->objectid != dev_rec->devid)
9684                         break;
9685
9686                 list_del_init(&dev_extent_rec->device_list);
9687                 total_byte += dev_extent_rec->length;
9688                 cache = next_cache_extent(cache);
9689         }
9690
9691         if (total_byte != dev_rec->byte_used) {
9692                 fprintf(stderr,
9693                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9694                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9695                         dev_rec->type, dev_rec->offset);
9696                 return -1;
9697         } else {
9698                 return 0;
9699         }
9700 }
9701
9702 /* check btrfs_dev_item -> btrfs_dev_extent */
9703 static int check_devices(struct rb_root *dev_cache,
9704                          struct device_extent_tree *dev_extent_cache)
9705 {
9706         struct rb_node *dev_node;
9707         struct device_record *dev_rec;
9708         struct device_extent_record *dext_rec;
9709         int err;
9710         int ret = 0;
9711
9712         dev_node = rb_first(dev_cache);
9713         while (dev_node) {
9714                 dev_rec = container_of(dev_node, struct device_record, node);
9715                 err = check_device_used(dev_rec, dev_extent_cache);
9716                 if (err)
9717                         ret = err;
9718
9719                 dev_node = rb_next(dev_node);
9720         }
9721         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9722                             device_list) {
9723                 fprintf(stderr,
9724                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9725                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9726                 if (!ret)
9727                         ret = 1;
9728         }
9729         return ret;
9730 }
9731
9732 static int add_root_item_to_list(struct list_head *head,
9733                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9734                                   u8 level, u8 drop_level,
9735                                   struct btrfs_key *drop_key)
9736 {
9737
9738         struct root_item_record *ri_rec;
9739         ri_rec = malloc(sizeof(*ri_rec));
9740         if (!ri_rec)
9741                 return -ENOMEM;
9742         ri_rec->bytenr = bytenr;
9743         ri_rec->objectid = objectid;
9744         ri_rec->level = level;
9745         ri_rec->drop_level = drop_level;
9746         ri_rec->last_snapshot = last_snapshot;
9747         if (drop_key)
9748                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9749         list_add_tail(&ri_rec->list, head);
9750
9751         return 0;
9752 }
9753
9754 static void free_root_item_list(struct list_head *list)
9755 {
9756         struct root_item_record *ri_rec;
9757
9758         while (!list_empty(list)) {
9759                 ri_rec = list_first_entry(list, struct root_item_record,
9760                                           list);
9761                 list_del_init(&ri_rec->list);
9762                 free(ri_rec);
9763         }
9764 }
9765
9766 static int deal_root_from_list(struct list_head *list,
9767                                struct btrfs_root *root,
9768                                struct block_info *bits,
9769                                int bits_nr,
9770                                struct cache_tree *pending,
9771                                struct cache_tree *seen,
9772                                struct cache_tree *reada,
9773                                struct cache_tree *nodes,
9774                                struct cache_tree *extent_cache,
9775                                struct cache_tree *chunk_cache,
9776                                struct rb_root *dev_cache,
9777                                struct block_group_tree *block_group_cache,
9778                                struct device_extent_tree *dev_extent_cache)
9779 {
9780         int ret = 0;
9781         u64 last;
9782
9783         while (!list_empty(list)) {
9784                 struct root_item_record *rec;
9785                 struct extent_buffer *buf;
9786                 rec = list_entry(list->next,
9787                                  struct root_item_record, list);
9788                 last = 0;
9789                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9790                 if (!extent_buffer_uptodate(buf)) {
9791                         free_extent_buffer(buf);
9792                         ret = -EIO;
9793                         break;
9794                 }
9795                 ret = add_root_to_pending(buf, extent_cache, pending,
9796                                     seen, nodes, rec->objectid);
9797                 if (ret < 0)
9798                         break;
9799                 /*
9800                  * To rebuild extent tree, we need deal with snapshot
9801                  * one by one, otherwise we deal with node firstly which
9802                  * can maximize readahead.
9803                  */
9804                 while (1) {
9805                         ret = run_next_block(root, bits, bits_nr, &last,
9806                                              pending, seen, reada, nodes,
9807                                              extent_cache, chunk_cache,
9808                                              dev_cache, block_group_cache,
9809                                              dev_extent_cache, rec);
9810                         if (ret != 0)
9811                                 break;
9812                 }
9813                 free_extent_buffer(buf);
9814                 list_del(&rec->list);
9815                 free(rec);
9816                 if (ret < 0)
9817                         break;
9818         }
9819         while (ret >= 0) {
9820                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9821                                      reada, nodes, extent_cache, chunk_cache,
9822                                      dev_cache, block_group_cache,
9823                                      dev_extent_cache, NULL);
9824                 if (ret != 0) {
9825                         if (ret > 0)
9826                                 ret = 0;
9827                         break;
9828                 }
9829         }
9830         return ret;
9831 }
9832
9833 static int check_chunks_and_extents(struct btrfs_root *root)
9834 {
9835         struct rb_root dev_cache;
9836         struct cache_tree chunk_cache;
9837         struct block_group_tree block_group_cache;
9838         struct device_extent_tree dev_extent_cache;
9839         struct cache_tree extent_cache;
9840         struct cache_tree seen;
9841         struct cache_tree pending;
9842         struct cache_tree reada;
9843         struct cache_tree nodes;
9844         struct extent_io_tree excluded_extents;
9845         struct cache_tree corrupt_blocks;
9846         struct btrfs_path path;
9847         struct btrfs_key key;
9848         struct btrfs_key found_key;
9849         int ret, err = 0;
9850         struct block_info *bits;
9851         int bits_nr;
9852         struct extent_buffer *leaf;
9853         int slot;
9854         struct btrfs_root_item ri;
9855         struct list_head dropping_trees;
9856         struct list_head normal_trees;
9857         struct btrfs_root *root1;
9858         u64 objectid;
9859         u8 level;
9860
9861         dev_cache = RB_ROOT;
9862         cache_tree_init(&chunk_cache);
9863         block_group_tree_init(&block_group_cache);
9864         device_extent_tree_init(&dev_extent_cache);
9865
9866         cache_tree_init(&extent_cache);
9867         cache_tree_init(&seen);
9868         cache_tree_init(&pending);
9869         cache_tree_init(&nodes);
9870         cache_tree_init(&reada);
9871         cache_tree_init(&corrupt_blocks);
9872         extent_io_tree_init(&excluded_extents);
9873         INIT_LIST_HEAD(&dropping_trees);
9874         INIT_LIST_HEAD(&normal_trees);
9875
9876         if (repair) {
9877                 root->fs_info->excluded_extents = &excluded_extents;
9878                 root->fs_info->fsck_extent_cache = &extent_cache;
9879                 root->fs_info->free_extent_hook = free_extent_hook;
9880                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9881         }
9882
9883         bits_nr = 1024;
9884         bits = malloc(bits_nr * sizeof(struct block_info));
9885         if (!bits) {
9886                 perror("malloc");
9887                 exit(1);
9888         }
9889
9890         if (ctx.progress_enabled) {
9891                 ctx.tp = TASK_EXTENTS;
9892                 task_start(ctx.info);
9893         }
9894
9895 again:
9896         root1 = root->fs_info->tree_root;
9897         level = btrfs_header_level(root1->node);
9898         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9899                                     root1->node->start, 0, level, 0, NULL);
9900         if (ret < 0)
9901                 goto out;
9902         root1 = root->fs_info->chunk_root;
9903         level = btrfs_header_level(root1->node);
9904         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9905                                     root1->node->start, 0, level, 0, NULL);
9906         if (ret < 0)
9907                 goto out;
9908         btrfs_init_path(&path);
9909         key.offset = 0;
9910         key.objectid = 0;
9911         key.type = BTRFS_ROOT_ITEM_KEY;
9912         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9913                                         &key, &path, 0, 0);
9914         if (ret < 0)
9915                 goto out;
9916         while(1) {
9917                 leaf = path.nodes[0];
9918                 slot = path.slots[0];
9919                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9920                         ret = btrfs_next_leaf(root, &path);
9921                         if (ret != 0)
9922                                 break;
9923                         leaf = path.nodes[0];
9924                         slot = path.slots[0];
9925                 }
9926                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9927                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9928                         unsigned long offset;
9929                         u64 last_snapshot;
9930
9931                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9932                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9933                         last_snapshot = btrfs_root_last_snapshot(&ri);
9934                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9935                                 level = btrfs_root_level(&ri);
9936                                 ret = add_root_item_to_list(&normal_trees,
9937                                                 found_key.objectid,
9938                                                 btrfs_root_bytenr(&ri),
9939                                                 last_snapshot, level,
9940                                                 0, NULL);
9941                                 if (ret < 0)
9942                                         goto out;
9943                         } else {
9944                                 level = btrfs_root_level(&ri);
9945                                 objectid = found_key.objectid;
9946                                 btrfs_disk_key_to_cpu(&found_key,
9947                                                       &ri.drop_progress);
9948                                 ret = add_root_item_to_list(&dropping_trees,
9949                                                 objectid,
9950                                                 btrfs_root_bytenr(&ri),
9951                                                 last_snapshot, level,
9952                                                 ri.drop_level, &found_key);
9953                                 if (ret < 0)
9954                                         goto out;
9955                         }
9956                 }
9957                 path.slots[0]++;
9958         }
9959         btrfs_release_path(&path);
9960
9961         /*
9962          * check_block can return -EAGAIN if it fixes something, please keep
9963          * this in mind when dealing with return values from these functions, if
9964          * we get -EAGAIN we want to fall through and restart the loop.
9965          */
9966         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9967                                   &seen, &reada, &nodes, &extent_cache,
9968                                   &chunk_cache, &dev_cache, &block_group_cache,
9969                                   &dev_extent_cache);
9970         if (ret < 0) {
9971                 if (ret == -EAGAIN)
9972                         goto loop;
9973                 goto out;
9974         }
9975         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9976                                   &pending, &seen, &reada, &nodes,
9977                                   &extent_cache, &chunk_cache, &dev_cache,
9978                                   &block_group_cache, &dev_extent_cache);
9979         if (ret < 0) {
9980                 if (ret == -EAGAIN)
9981                         goto loop;
9982                 goto out;
9983         }
9984
9985         ret = check_chunks(&chunk_cache, &block_group_cache,
9986                            &dev_extent_cache, NULL, NULL, NULL, 0);
9987         if (ret) {
9988                 if (ret == -EAGAIN)
9989                         goto loop;
9990                 err = ret;
9991         }
9992
9993         ret = check_extent_refs(root, &extent_cache);
9994         if (ret < 0) {
9995                 if (ret == -EAGAIN)
9996                         goto loop;
9997                 goto out;
9998         }
9999
10000         ret = check_devices(&dev_cache, &dev_extent_cache);
10001         if (ret && err)
10002                 ret = err;
10003
10004 out:
10005         task_stop(ctx.info);
10006         if (repair) {
10007                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10008                 extent_io_tree_cleanup(&excluded_extents);
10009                 root->fs_info->fsck_extent_cache = NULL;
10010                 root->fs_info->free_extent_hook = NULL;
10011                 root->fs_info->corrupt_blocks = NULL;
10012                 root->fs_info->excluded_extents = NULL;
10013         }
10014         free(bits);
10015         free_chunk_cache_tree(&chunk_cache);
10016         free_device_cache_tree(&dev_cache);
10017         free_block_group_tree(&block_group_cache);
10018         free_device_extent_tree(&dev_extent_cache);
10019         free_extent_cache_tree(&seen);
10020         free_extent_cache_tree(&pending);
10021         free_extent_cache_tree(&reada);
10022         free_extent_cache_tree(&nodes);
10023         free_root_item_list(&normal_trees);
10024         free_root_item_list(&dropping_trees);
10025         return ret;
10026 loop:
10027         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10028         free_extent_cache_tree(&seen);
10029         free_extent_cache_tree(&pending);
10030         free_extent_cache_tree(&reada);
10031         free_extent_cache_tree(&nodes);
10032         free_chunk_cache_tree(&chunk_cache);
10033         free_block_group_tree(&block_group_cache);
10034         free_device_cache_tree(&dev_cache);
10035         free_device_extent_tree(&dev_extent_cache);
10036         free_extent_record_cache(&extent_cache);
10037         free_root_item_list(&normal_trees);
10038         free_root_item_list(&dropping_trees);
10039         extent_io_tree_cleanup(&excluded_extents);
10040         goto again;
10041 }
10042
10043 /*
10044  * Check backrefs of a tree block given by @bytenr or @eb.
10045  *
10046  * @root:       the root containing the @bytenr or @eb
10047  * @eb:         tree block extent buffer, can be NULL
10048  * @bytenr:     bytenr of the tree block to search
10049  * @level:      tree level of the tree block
10050  * @owner:      owner of the tree block
10051  *
10052  * Return >0 for any error found and output error message
10053  * Return 0 for no error found
10054  */
10055 static int check_tree_block_ref(struct btrfs_root *root,
10056                                 struct extent_buffer *eb, u64 bytenr,
10057                                 int level, u64 owner)
10058 {
10059         struct btrfs_key key;
10060         struct btrfs_root *extent_root = root->fs_info->extent_root;
10061         struct btrfs_path path;
10062         struct btrfs_extent_item *ei;
10063         struct btrfs_extent_inline_ref *iref;
10064         struct extent_buffer *leaf;
10065         unsigned long end;
10066         unsigned long ptr;
10067         int slot;
10068         int skinny_level;
10069         int type;
10070         u32 nodesize = root->fs_info->nodesize;
10071         u32 item_size;
10072         u64 offset;
10073         int tree_reloc_root = 0;
10074         int found_ref = 0;
10075         int err = 0;
10076         int ret;
10077
10078         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10079             btrfs_header_bytenr(root->node) == bytenr)
10080                 tree_reloc_root = 1;
10081
10082         btrfs_init_path(&path);
10083         key.objectid = bytenr;
10084         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10085                 key.type = BTRFS_METADATA_ITEM_KEY;
10086         else
10087                 key.type = BTRFS_EXTENT_ITEM_KEY;
10088         key.offset = (u64)-1;
10089
10090         /* Search for the backref in extent tree */
10091         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10092         if (ret < 0) {
10093                 err |= BACKREF_MISSING;
10094                 goto out;
10095         }
10096         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10097         if (ret) {
10098                 err |= BACKREF_MISSING;
10099                 goto out;
10100         }
10101
10102         leaf = path.nodes[0];
10103         slot = path.slots[0];
10104         btrfs_item_key_to_cpu(leaf, &key, slot);
10105
10106         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10107
10108         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10109                 skinny_level = (int)key.offset;
10110                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10111         } else {
10112                 struct btrfs_tree_block_info *info;
10113
10114                 info = (struct btrfs_tree_block_info *)(ei + 1);
10115                 skinny_level = btrfs_tree_block_level(leaf, info);
10116                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10117         }
10118
10119         if (eb) {
10120                 u64 header_gen;
10121                 u64 extent_gen;
10122
10123                 if (!(btrfs_extent_flags(leaf, ei) &
10124                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10125                         error(
10126                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10127                                 key.objectid, nodesize,
10128                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10129                         err = BACKREF_MISMATCH;
10130                 }
10131                 header_gen = btrfs_header_generation(eb);
10132                 extent_gen = btrfs_extent_generation(leaf, ei);
10133                 if (header_gen != extent_gen) {
10134                         error(
10135         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10136                                 key.objectid, nodesize, header_gen,
10137                                 extent_gen);
10138                         err = BACKREF_MISMATCH;
10139                 }
10140                 if (level != skinny_level) {
10141                         error(
10142                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10143                                 key.objectid, nodesize, level, skinny_level);
10144                         err = BACKREF_MISMATCH;
10145                 }
10146                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10147                         error(
10148                         "extent[%llu %u] is referred by other roots than %llu",
10149                                 key.objectid, nodesize, root->objectid);
10150                         err = BACKREF_MISMATCH;
10151                 }
10152         }
10153
10154         /*
10155          * Iterate the extent/metadata item to find the exact backref
10156          */
10157         item_size = btrfs_item_size_nr(leaf, slot);
10158         ptr = (unsigned long)iref;
10159         end = (unsigned long)ei + item_size;
10160         while (ptr < end) {
10161                 iref = (struct btrfs_extent_inline_ref *)ptr;
10162                 type = btrfs_extent_inline_ref_type(leaf, iref);
10163                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10164
10165                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10166                         (offset == root->objectid || offset == owner)) {
10167                         found_ref = 1;
10168                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10169                         /*
10170                          * Backref of tree reloc root points to itself, no need
10171                          * to check backref any more.
10172                          */
10173                         if (tree_reloc_root)
10174                                 found_ref = 1;
10175                         else
10176                         /* Check if the backref points to valid referencer */
10177                                 found_ref = !check_tree_block_ref(root, NULL,
10178                                                 offset, level + 1, owner);
10179                 }
10180
10181                 if (found_ref)
10182                         break;
10183                 ptr += btrfs_extent_inline_ref_size(type);
10184         }
10185
10186         /*
10187          * Inlined extent item doesn't have what we need, check
10188          * TREE_BLOCK_REF_KEY
10189          */
10190         if (!found_ref) {
10191                 btrfs_release_path(&path);
10192                 key.objectid = bytenr;
10193                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10194                 key.offset = root->objectid;
10195
10196                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10197                 if (!ret)
10198                         found_ref = 1;
10199         }
10200         if (!found_ref)
10201                 err |= BACKREF_MISSING;
10202 out:
10203         btrfs_release_path(&path);
10204         if (eb && (err & BACKREF_MISSING))
10205                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10206                         bytenr, nodesize, owner, level);
10207         return err;
10208 }
10209
10210 /*
10211  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10212  *
10213  * Return >0 any error found and output error message
10214  * Return 0 for no error found
10215  */
10216 static int check_extent_data_item(struct btrfs_root *root,
10217                                   struct extent_buffer *eb, int slot)
10218 {
10219         struct btrfs_file_extent_item *fi;
10220         struct btrfs_path path;
10221         struct btrfs_root *extent_root = root->fs_info->extent_root;
10222         struct btrfs_key fi_key;
10223         struct btrfs_key dbref_key;
10224         struct extent_buffer *leaf;
10225         struct btrfs_extent_item *ei;
10226         struct btrfs_extent_inline_ref *iref;
10227         struct btrfs_extent_data_ref *dref;
10228         u64 owner;
10229         u64 disk_bytenr;
10230         u64 disk_num_bytes;
10231         u64 extent_num_bytes;
10232         u64 extent_flags;
10233         u32 item_size;
10234         unsigned long end;
10235         unsigned long ptr;
10236         int type;
10237         u64 ref_root;
10238         int found_dbackref = 0;
10239         int err = 0;
10240         int ret;
10241
10242         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10243         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10244
10245         /* Nothing to check for hole and inline data extents */
10246         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10247             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10248                 return 0;
10249
10250         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10251         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10252         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10253
10254         /* Check unaligned disk_num_bytes and num_bytes */
10255         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10256                 error(
10257 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10258                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10259                         root->fs_info->sectorsize);
10260                 err |= BYTES_UNALIGNED;
10261         } else {
10262                 data_bytes_allocated += disk_num_bytes;
10263         }
10264         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10265                 error(
10266 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10267                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10268                         root->fs_info->sectorsize);
10269                 err |= BYTES_UNALIGNED;
10270         } else {
10271                 data_bytes_referenced += extent_num_bytes;
10272         }
10273         owner = btrfs_header_owner(eb);
10274
10275         /* Check the extent item of the file extent in extent tree */
10276         btrfs_init_path(&path);
10277         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10278         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10279         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10280
10281         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10282         if (ret)
10283                 goto out;
10284
10285         leaf = path.nodes[0];
10286         slot = path.slots[0];
10287         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10288
10289         extent_flags = btrfs_extent_flags(leaf, ei);
10290
10291         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10292                 error(
10293                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10294                     disk_bytenr, disk_num_bytes,
10295                     BTRFS_EXTENT_FLAG_DATA);
10296                 err |= BACKREF_MISMATCH;
10297         }
10298
10299         /* Check data backref inside that extent item */
10300         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10301         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10302         ptr = (unsigned long)iref;
10303         end = (unsigned long)ei + item_size;
10304         while (ptr < end) {
10305                 iref = (struct btrfs_extent_inline_ref *)ptr;
10306                 type = btrfs_extent_inline_ref_type(leaf, iref);
10307                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10308
10309                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10310                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10311                         if (ref_root == owner || ref_root == root->objectid)
10312                                 found_dbackref = 1;
10313                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10314                         found_dbackref = !check_tree_block_ref(root, NULL,
10315                                 btrfs_extent_inline_ref_offset(leaf, iref),
10316                                 0, owner);
10317                 }
10318
10319                 if (found_dbackref)
10320                         break;
10321                 ptr += btrfs_extent_inline_ref_size(type);
10322         }
10323
10324         if (!found_dbackref) {
10325                 btrfs_release_path(&path);
10326
10327                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10328                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10329                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10330                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10331                                 fi_key.objectid, fi_key.offset);
10332
10333                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10334                                         &dbref_key, &path, 0, 0);
10335                 if (!ret) {
10336                         found_dbackref = 1;
10337                         goto out;
10338                 }
10339
10340                 btrfs_release_path(&path);
10341
10342                 /*
10343                  * Neither inlined nor EXTENT_DATA_REF found, try
10344                  * SHARED_DATA_REF as last chance.
10345                  */
10346                 dbref_key.objectid = disk_bytenr;
10347                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10348                 dbref_key.offset = eb->start;
10349
10350                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10351                                         &dbref_key, &path, 0, 0);
10352                 if (!ret) {
10353                         found_dbackref = 1;
10354                         goto out;
10355                 }
10356         }
10357
10358 out:
10359         if (!found_dbackref)
10360                 err |= BACKREF_MISSING;
10361         btrfs_release_path(&path);
10362         if (err & BACKREF_MISSING) {
10363                 error("data extent[%llu %llu] backref lost",
10364                       disk_bytenr, disk_num_bytes);
10365         }
10366         return err;
10367 }
10368
10369 /*
10370  * Get real tree block level for the case like shared block
10371  * Return >= 0 as tree level
10372  * Return <0 for error
10373  */
10374 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10375 {
10376         struct extent_buffer *eb;
10377         struct btrfs_path path;
10378         struct btrfs_key key;
10379         struct btrfs_extent_item *ei;
10380         u64 flags;
10381         u64 transid;
10382         u8 backref_level;
10383         u8 header_level;
10384         int ret;
10385
10386         /* Search extent tree for extent generation and level */
10387         key.objectid = bytenr;
10388         key.type = BTRFS_METADATA_ITEM_KEY;
10389         key.offset = (u64)-1;
10390
10391         btrfs_init_path(&path);
10392         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10393         if (ret < 0)
10394                 goto release_out;
10395         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10396         if (ret < 0)
10397                 goto release_out;
10398         if (ret > 0) {
10399                 ret = -ENOENT;
10400                 goto release_out;
10401         }
10402
10403         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10404         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10405                             struct btrfs_extent_item);
10406         flags = btrfs_extent_flags(path.nodes[0], ei);
10407         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10408                 ret = -ENOENT;
10409                 goto release_out;
10410         }
10411
10412         /* Get transid for later read_tree_block() check */
10413         transid = btrfs_extent_generation(path.nodes[0], ei);
10414
10415         /* Get backref level as one source */
10416         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10417                 backref_level = key.offset;
10418         } else {
10419                 struct btrfs_tree_block_info *info;
10420
10421                 info = (struct btrfs_tree_block_info *)(ei + 1);
10422                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10423         }
10424         btrfs_release_path(&path);
10425
10426         /* Get level from tree block as an alternative source */
10427         eb = read_tree_block(fs_info, bytenr, transid);
10428         if (!extent_buffer_uptodate(eb)) {
10429                 free_extent_buffer(eb);
10430                 return -EIO;
10431         }
10432         header_level = btrfs_header_level(eb);
10433         free_extent_buffer(eb);
10434
10435         if (header_level != backref_level)
10436                 return -EIO;
10437         return header_level;
10438
10439 release_out:
10440         btrfs_release_path(&path);
10441         return ret;
10442 }
10443
10444 /*
10445  * Check if a tree block backref is valid (points to a valid tree block)
10446  * if level == -1, level will be resolved
10447  * Return >0 for any error found and print error message
10448  */
10449 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10450                                     u64 bytenr, int level)
10451 {
10452         struct btrfs_root *root;
10453         struct btrfs_key key;
10454         struct btrfs_path path;
10455         struct extent_buffer *eb;
10456         struct extent_buffer *node;
10457         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10458         int err = 0;
10459         int ret;
10460
10461         /* Query level for level == -1 special case */
10462         if (level == -1)
10463                 level = query_tree_block_level(fs_info, bytenr);
10464         if (level < 0) {
10465                 err |= REFERENCER_MISSING;
10466                 goto out;
10467         }
10468
10469         key.objectid = root_id;
10470         key.type = BTRFS_ROOT_ITEM_KEY;
10471         key.offset = (u64)-1;
10472
10473         root = btrfs_read_fs_root(fs_info, &key);
10474         if (IS_ERR(root)) {
10475                 err |= REFERENCER_MISSING;
10476                 goto out;
10477         }
10478
10479         /* Read out the tree block to get item/node key */
10480         eb = read_tree_block(fs_info, bytenr, 0);
10481         if (!extent_buffer_uptodate(eb)) {
10482                 err |= REFERENCER_MISSING;
10483                 free_extent_buffer(eb);
10484                 goto out;
10485         }
10486
10487         /* Empty tree, no need to check key */
10488         if (!btrfs_header_nritems(eb) && !level) {
10489                 free_extent_buffer(eb);
10490                 goto out;
10491         }
10492
10493         if (level)
10494                 btrfs_node_key_to_cpu(eb, &key, 0);
10495         else
10496                 btrfs_item_key_to_cpu(eb, &key, 0);
10497
10498         free_extent_buffer(eb);
10499
10500         btrfs_init_path(&path);
10501         path.lowest_level = level;
10502         /* Search with the first key, to ensure we can reach it */
10503         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10504         if (ret < 0) {
10505                 err |= REFERENCER_MISSING;
10506                 goto release_out;
10507         }
10508
10509         node = path.nodes[level];
10510         if (btrfs_header_bytenr(node) != bytenr) {
10511                 error(
10512         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10513                         bytenr, nodesize, bytenr,
10514                         btrfs_header_bytenr(node));
10515                 err |= REFERENCER_MISMATCH;
10516         }
10517         if (btrfs_header_level(node) != level) {
10518                 error(
10519         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10520                         bytenr, nodesize, level,
10521                         btrfs_header_level(node));
10522                 err |= REFERENCER_MISMATCH;
10523         }
10524
10525 release_out:
10526         btrfs_release_path(&path);
10527 out:
10528         if (err & REFERENCER_MISSING) {
10529                 if (level < 0)
10530                         error("extent [%llu %d] lost referencer (owner: %llu)",
10531                                 bytenr, nodesize, root_id);
10532                 else
10533                         error(
10534                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10535                                 bytenr, nodesize, root_id, level);
10536         }
10537
10538         return err;
10539 }
10540
10541 /*
10542  * Check if tree block @eb is tree reloc root.
10543  * Return 0 if it's not or any problem happens
10544  * Return 1 if it's a tree reloc root
10545  */
10546 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10547                                  struct extent_buffer *eb)
10548 {
10549         struct btrfs_root *tree_reloc_root;
10550         struct btrfs_key key;
10551         u64 bytenr = btrfs_header_bytenr(eb);
10552         u64 owner = btrfs_header_owner(eb);
10553         int ret = 0;
10554
10555         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10556         key.offset = owner;
10557         key.type = BTRFS_ROOT_ITEM_KEY;
10558
10559         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10560         if (IS_ERR(tree_reloc_root))
10561                 return 0;
10562
10563         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10564                 ret = 1;
10565         btrfs_free_fs_root(tree_reloc_root);
10566         return ret;
10567 }
10568
10569 /*
10570  * Check referencer for shared block backref
10571  * If level == -1, this function will resolve the level.
10572  */
10573 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10574                                      u64 parent, u64 bytenr, int level)
10575 {
10576         struct extent_buffer *eb;
10577         u32 nr;
10578         int found_parent = 0;
10579         int i;
10580
10581         eb = read_tree_block(fs_info, parent, 0);
10582         if (!extent_buffer_uptodate(eb))
10583                 goto out;
10584
10585         if (level == -1)
10586                 level = query_tree_block_level(fs_info, bytenr);
10587         if (level < 0)
10588                 goto out;
10589
10590         /* It's possible it's a tree reloc root */
10591         if (parent == bytenr) {
10592                 if (is_tree_reloc_root(fs_info, eb))
10593                         found_parent = 1;
10594                 goto out;
10595         }
10596
10597         if (level + 1 != btrfs_header_level(eb))
10598                 goto out;
10599
10600         nr = btrfs_header_nritems(eb);
10601         for (i = 0; i < nr; i++) {
10602                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10603                         found_parent = 1;
10604                         break;
10605                 }
10606         }
10607 out:
10608         free_extent_buffer(eb);
10609         if (!found_parent) {
10610                 error(
10611         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10612                         bytenr, fs_info->nodesize, parent, level);
10613                 return REFERENCER_MISSING;
10614         }
10615         return 0;
10616 }
10617
10618 /*
10619  * Check referencer for normal (inlined) data ref
10620  * If len == 0, it will be resolved by searching in extent tree
10621  */
10622 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10623                                      u64 root_id, u64 objectid, u64 offset,
10624                                      u64 bytenr, u64 len, u32 count)
10625 {
10626         struct btrfs_root *root;
10627         struct btrfs_root *extent_root = fs_info->extent_root;
10628         struct btrfs_key key;
10629         struct btrfs_path path;
10630         struct extent_buffer *leaf;
10631         struct btrfs_file_extent_item *fi;
10632         u32 found_count = 0;
10633         int slot;
10634         int ret = 0;
10635
10636         if (!len) {
10637                 key.objectid = bytenr;
10638                 key.type = BTRFS_EXTENT_ITEM_KEY;
10639                 key.offset = (u64)-1;
10640
10641                 btrfs_init_path(&path);
10642                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10643                 if (ret < 0)
10644                         goto out;
10645                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10646                 if (ret)
10647                         goto out;
10648                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10649                 if (key.objectid != bytenr ||
10650                     key.type != BTRFS_EXTENT_ITEM_KEY)
10651                         goto out;
10652                 len = key.offset;
10653                 btrfs_release_path(&path);
10654         }
10655         key.objectid = root_id;
10656         key.type = BTRFS_ROOT_ITEM_KEY;
10657         key.offset = (u64)-1;
10658         btrfs_init_path(&path);
10659
10660         root = btrfs_read_fs_root(fs_info, &key);
10661         if (IS_ERR(root))
10662                 goto out;
10663
10664         key.objectid = objectid;
10665         key.type = BTRFS_EXTENT_DATA_KEY;
10666         /*
10667          * It can be nasty as data backref offset is
10668          * file offset - file extent offset, which is smaller or
10669          * equal to original backref offset.  The only special case is
10670          * overflow.  So we need to special check and do further search.
10671          */
10672         key.offset = offset & (1ULL << 63) ? 0 : offset;
10673
10674         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10675         if (ret < 0)
10676                 goto out;
10677
10678         /*
10679          * Search afterwards to get correct one
10680          * NOTE: As we must do a comprehensive check on the data backref to
10681          * make sure the dref count also matches, we must iterate all file
10682          * extents for that inode.
10683          */
10684         while (1) {
10685                 leaf = path.nodes[0];
10686                 slot = path.slots[0];
10687
10688                 if (slot >= btrfs_header_nritems(leaf))
10689                         goto next;
10690                 btrfs_item_key_to_cpu(leaf, &key, slot);
10691                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10692                         break;
10693                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10694                 /*
10695                  * Except normal disk bytenr and disk num bytes, we still
10696                  * need to do extra check on dbackref offset as
10697                  * dbackref offset = file_offset - file_extent_offset
10698                  */
10699                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10700                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10701                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10702                     offset)
10703                         found_count++;
10704
10705 next:
10706                 ret = btrfs_next_item(root, &path);
10707                 if (ret)
10708                         break;
10709         }
10710 out:
10711         btrfs_release_path(&path);
10712         if (found_count != count) {
10713                 error(
10714 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10715                         bytenr, len, root_id, objectid, offset, count, found_count);
10716                 return REFERENCER_MISSING;
10717         }
10718         return 0;
10719 }
10720
10721 /*
10722  * Check if the referencer of a shared data backref exists
10723  */
10724 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10725                                      u64 parent, u64 bytenr)
10726 {
10727         struct extent_buffer *eb;
10728         struct btrfs_key key;
10729         struct btrfs_file_extent_item *fi;
10730         u32 nr;
10731         int found_parent = 0;
10732         int i;
10733
10734         eb = read_tree_block(fs_info, parent, 0);
10735         if (!extent_buffer_uptodate(eb))
10736                 goto out;
10737
10738         nr = btrfs_header_nritems(eb);
10739         for (i = 0; i < nr; i++) {
10740                 btrfs_item_key_to_cpu(eb, &key, i);
10741                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10742                         continue;
10743
10744                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10745                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10746                         continue;
10747
10748                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10749                         found_parent = 1;
10750                         break;
10751                 }
10752         }
10753
10754 out:
10755         free_extent_buffer(eb);
10756         if (!found_parent) {
10757                 error("shared extent %llu referencer lost (parent: %llu)",
10758                         bytenr, parent);
10759                 return REFERENCER_MISSING;
10760         }
10761         return 0;
10762 }
10763
10764 /*
10765  * This function will check a given extent item, including its backref and
10766  * itself (like crossing stripe boundary and type)
10767  *
10768  * Since we don't use extent_record anymore, introduce new error bit
10769  */
10770 static int check_extent_item(struct btrfs_fs_info *fs_info,
10771                              struct extent_buffer *eb, int slot)
10772 {
10773         struct btrfs_extent_item *ei;
10774         struct btrfs_extent_inline_ref *iref;
10775         struct btrfs_extent_data_ref *dref;
10776         unsigned long end;
10777         unsigned long ptr;
10778         int type;
10779         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10780         u32 item_size = btrfs_item_size_nr(eb, slot);
10781         u64 flags;
10782         u64 offset;
10783         int metadata = 0;
10784         int level;
10785         struct btrfs_key key;
10786         int ret;
10787         int err = 0;
10788
10789         btrfs_item_key_to_cpu(eb, &key, slot);
10790         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10791                 bytes_used += key.offset;
10792         else
10793                 bytes_used += nodesize;
10794
10795         if (item_size < sizeof(*ei)) {
10796                 /*
10797                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10798                  * old thing when on disk format is still un-determined.
10799                  * No need to care about it anymore
10800                  */
10801                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10802                 return -ENOTTY;
10803         }
10804
10805         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10806         flags = btrfs_extent_flags(eb, ei);
10807
10808         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10809                 metadata = 1;
10810         if (metadata && check_crossing_stripes(global_info, key.objectid,
10811                                                eb->len)) {
10812                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10813                       key.objectid, key.objectid + nodesize);
10814                 err |= CROSSING_STRIPE_BOUNDARY;
10815         }
10816
10817         ptr = (unsigned long)(ei + 1);
10818
10819         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10820                 /* Old EXTENT_ITEM metadata */
10821                 struct btrfs_tree_block_info *info;
10822
10823                 info = (struct btrfs_tree_block_info *)ptr;
10824                 level = btrfs_tree_block_level(eb, info);
10825                 ptr += sizeof(struct btrfs_tree_block_info);
10826         } else {
10827                 /* New METADATA_ITEM */
10828                 level = key.offset;
10829         }
10830         end = (unsigned long)ei + item_size;
10831
10832 next:
10833         /* Reached extent item end normally */
10834         if (ptr == end)
10835                 goto out;
10836
10837         /* Beyond extent item end, wrong item size */
10838         if (ptr > end) {
10839                 err |= ITEM_SIZE_MISMATCH;
10840                 error("extent item at bytenr %llu slot %d has wrong size",
10841                         eb->start, slot);
10842                 goto out;
10843         }
10844
10845         /* Now check every backref in this extent item */
10846         iref = (struct btrfs_extent_inline_ref *)ptr;
10847         type = btrfs_extent_inline_ref_type(eb, iref);
10848         offset = btrfs_extent_inline_ref_offset(eb, iref);
10849         switch (type) {
10850         case BTRFS_TREE_BLOCK_REF_KEY:
10851                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10852                                                level);
10853                 err |= ret;
10854                 break;
10855         case BTRFS_SHARED_BLOCK_REF_KEY:
10856                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10857                                                  level);
10858                 err |= ret;
10859                 break;
10860         case BTRFS_EXTENT_DATA_REF_KEY:
10861                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10862                 ret = check_extent_data_backref(fs_info,
10863                                 btrfs_extent_data_ref_root(eb, dref),
10864                                 btrfs_extent_data_ref_objectid(eb, dref),
10865                                 btrfs_extent_data_ref_offset(eb, dref),
10866                                 key.objectid, key.offset,
10867                                 btrfs_extent_data_ref_count(eb, dref));
10868                 err |= ret;
10869                 break;
10870         case BTRFS_SHARED_DATA_REF_KEY:
10871                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10872                 err |= ret;
10873                 break;
10874         default:
10875                 error("extent[%llu %d %llu] has unknown ref type: %d",
10876                         key.objectid, key.type, key.offset, type);
10877                 err |= UNKNOWN_TYPE;
10878                 goto out;
10879         }
10880
10881         ptr += btrfs_extent_inline_ref_size(type);
10882         goto next;
10883
10884 out:
10885         return err;
10886 }
10887
10888 /*
10889  * Check if a dev extent item is referred correctly by its chunk
10890  */
10891 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10892                                  struct extent_buffer *eb, int slot)
10893 {
10894         struct btrfs_root *chunk_root = fs_info->chunk_root;
10895         struct btrfs_dev_extent *ptr;
10896         struct btrfs_path path;
10897         struct btrfs_key chunk_key;
10898         struct btrfs_key devext_key;
10899         struct btrfs_chunk *chunk;
10900         struct extent_buffer *l;
10901         int num_stripes;
10902         u64 length;
10903         int i;
10904         int found_chunk = 0;
10905         int ret;
10906
10907         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10908         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10909         length = btrfs_dev_extent_length(eb, ptr);
10910
10911         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10912         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10913         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10914
10915         btrfs_init_path(&path);
10916         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10917         if (ret)
10918                 goto out;
10919
10920         l = path.nodes[0];
10921         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10922         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10923                                       chunk_key.offset);
10924         if (ret < 0)
10925                 goto out;
10926
10927         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10928                 goto out;
10929
10930         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10931         for (i = 0; i < num_stripes; i++) {
10932                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10933                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10934
10935                 if (devid == devext_key.objectid &&
10936                     offset == devext_key.offset) {
10937                         found_chunk = 1;
10938                         break;
10939                 }
10940         }
10941 out:
10942         btrfs_release_path(&path);
10943         if (!found_chunk) {
10944                 error(
10945                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10946                         devext_key.objectid, devext_key.offset, length);
10947                 return REFERENCER_MISSING;
10948         }
10949         return 0;
10950 }
10951
10952 /*
10953  * Check if the used space is correct with the dev item
10954  */
10955 static int check_dev_item(struct btrfs_fs_info *fs_info,
10956                           struct extent_buffer *eb, int slot)
10957 {
10958         struct btrfs_root *dev_root = fs_info->dev_root;
10959         struct btrfs_dev_item *dev_item;
10960         struct btrfs_path path;
10961         struct btrfs_key key;
10962         struct btrfs_dev_extent *ptr;
10963         u64 dev_id;
10964         u64 used;
10965         u64 total = 0;
10966         int ret;
10967
10968         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10969         dev_id = btrfs_device_id(eb, dev_item);
10970         used = btrfs_device_bytes_used(eb, dev_item);
10971
10972         key.objectid = dev_id;
10973         key.type = BTRFS_DEV_EXTENT_KEY;
10974         key.offset = 0;
10975
10976         btrfs_init_path(&path);
10977         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10978         if (ret < 0) {
10979                 btrfs_item_key_to_cpu(eb, &key, slot);
10980                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10981                         key.objectid, key.type, key.offset);
10982                 btrfs_release_path(&path);
10983                 return REFERENCER_MISSING;
10984         }
10985
10986         /* Iterate dev_extents to calculate the used space of a device */
10987         while (1) {
10988                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10989                         goto next;
10990
10991                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10992                 if (key.objectid > dev_id)
10993                         break;
10994                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10995                         goto next;
10996
10997                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10998                                      struct btrfs_dev_extent);
10999                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11000 next:
11001                 ret = btrfs_next_item(dev_root, &path);
11002                 if (ret)
11003                         break;
11004         }
11005         btrfs_release_path(&path);
11006
11007         if (used != total) {
11008                 btrfs_item_key_to_cpu(eb, &key, slot);
11009                 error(
11010 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11011                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11012                         BTRFS_DEV_EXTENT_KEY, dev_id);
11013                 return ACCOUNTING_MISMATCH;
11014         }
11015         return 0;
11016 }
11017
11018 /*
11019  * Check a block group item with its referener (chunk) and its used space
11020  * with extent/metadata item
11021  */
11022 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11023                                   struct extent_buffer *eb, int slot)
11024 {
11025         struct btrfs_root *extent_root = fs_info->extent_root;
11026         struct btrfs_root *chunk_root = fs_info->chunk_root;
11027         struct btrfs_block_group_item *bi;
11028         struct btrfs_block_group_item bg_item;
11029         struct btrfs_path path;
11030         struct btrfs_key bg_key;
11031         struct btrfs_key chunk_key;
11032         struct btrfs_key extent_key;
11033         struct btrfs_chunk *chunk;
11034         struct extent_buffer *leaf;
11035         struct btrfs_extent_item *ei;
11036         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11037         u64 flags;
11038         u64 bg_flags;
11039         u64 used;
11040         u64 total = 0;
11041         int ret;
11042         int err = 0;
11043
11044         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11045         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11046         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11047         used = btrfs_block_group_used(&bg_item);
11048         bg_flags = btrfs_block_group_flags(&bg_item);
11049
11050         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11051         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11052         chunk_key.offset = bg_key.objectid;
11053
11054         btrfs_init_path(&path);
11055         /* Search for the referencer chunk */
11056         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11057         if (ret) {
11058                 error(
11059                 "block group[%llu %llu] did not find the related chunk item",
11060                         bg_key.objectid, bg_key.offset);
11061                 err |= REFERENCER_MISSING;
11062         } else {
11063                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11064                                         struct btrfs_chunk);
11065                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11066                                                 bg_key.offset) {
11067                         error(
11068         "block group[%llu %llu] related chunk item length does not match",
11069                                 bg_key.objectid, bg_key.offset);
11070                         err |= REFERENCER_MISMATCH;
11071                 }
11072         }
11073         btrfs_release_path(&path);
11074
11075         /* Search from the block group bytenr */
11076         extent_key.objectid = bg_key.objectid;
11077         extent_key.type = 0;
11078         extent_key.offset = 0;
11079
11080         btrfs_init_path(&path);
11081         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11082         if (ret < 0)
11083                 goto out;
11084
11085         /* Iterate extent tree to account used space */
11086         while (1) {
11087                 leaf = path.nodes[0];
11088
11089                 /* Search slot can point to the last item beyond leaf nritems */
11090                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11091                         goto next;
11092
11093                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11094                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11095                         break;
11096
11097                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11098                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11099                         goto next;
11100                 if (extent_key.objectid < bg_key.objectid)
11101                         goto next;
11102
11103                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11104                         total += nodesize;
11105                 else
11106                         total += extent_key.offset;
11107
11108                 ei = btrfs_item_ptr(leaf, path.slots[0],
11109                                     struct btrfs_extent_item);
11110                 flags = btrfs_extent_flags(leaf, ei);
11111                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11112                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11113                                 error(
11114                         "bad extent[%llu, %llu) type mismatch with chunk",
11115                                         extent_key.objectid,
11116                                         extent_key.objectid + extent_key.offset);
11117                                 err |= CHUNK_TYPE_MISMATCH;
11118                         }
11119                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11120                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11121                                     BTRFS_BLOCK_GROUP_METADATA))) {
11122                                 error(
11123                         "bad extent[%llu, %llu) type mismatch with chunk",
11124                                         extent_key.objectid,
11125                                         extent_key.objectid + nodesize);
11126                                 err |= CHUNK_TYPE_MISMATCH;
11127                         }
11128                 }
11129 next:
11130                 ret = btrfs_next_item(extent_root, &path);
11131                 if (ret)
11132                         break;
11133         }
11134
11135 out:
11136         btrfs_release_path(&path);
11137
11138         if (total != used) {
11139                 error(
11140                 "block group[%llu %llu] used %llu but extent items used %llu",
11141                         bg_key.objectid, bg_key.offset, used, total);
11142                 err |= ACCOUNTING_MISMATCH;
11143         }
11144         return err;
11145 }
11146
11147 /*
11148  * Check a chunk item.
11149  * Including checking all referred dev_extents and block group
11150  */
11151 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11152                             struct extent_buffer *eb, int slot)
11153 {
11154         struct btrfs_root *extent_root = fs_info->extent_root;
11155         struct btrfs_root *dev_root = fs_info->dev_root;
11156         struct btrfs_path path;
11157         struct btrfs_key chunk_key;
11158         struct btrfs_key bg_key;
11159         struct btrfs_key devext_key;
11160         struct btrfs_chunk *chunk;
11161         struct extent_buffer *leaf;
11162         struct btrfs_block_group_item *bi;
11163         struct btrfs_block_group_item bg_item;
11164         struct btrfs_dev_extent *ptr;
11165         u64 length;
11166         u64 chunk_end;
11167         u64 stripe_len;
11168         u64 type;
11169         int num_stripes;
11170         u64 offset;
11171         u64 objectid;
11172         int i;
11173         int ret;
11174         int err = 0;
11175
11176         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11177         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11178         length = btrfs_chunk_length(eb, chunk);
11179         chunk_end = chunk_key.offset + length;
11180         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11181                                       chunk_key.offset);
11182         if (ret < 0) {
11183                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11184                         chunk_end);
11185                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11186                 goto out;
11187         }
11188         type = btrfs_chunk_type(eb, chunk);
11189
11190         bg_key.objectid = chunk_key.offset;
11191         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11192         bg_key.offset = length;
11193
11194         btrfs_init_path(&path);
11195         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11196         if (ret) {
11197                 error(
11198                 "chunk[%llu %llu) did not find the related block group item",
11199                         chunk_key.offset, chunk_end);
11200                 err |= REFERENCER_MISSING;
11201         } else{
11202                 leaf = path.nodes[0];
11203                 bi = btrfs_item_ptr(leaf, path.slots[0],
11204                                     struct btrfs_block_group_item);
11205                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11206                                    sizeof(bg_item));
11207                 if (btrfs_block_group_flags(&bg_item) != type) {
11208                         error(
11209 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11210                                 chunk_key.offset, chunk_end, type,
11211                                 btrfs_block_group_flags(&bg_item));
11212                         err |= REFERENCER_MISSING;
11213                 }
11214         }
11215
11216         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11217         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11218         for (i = 0; i < num_stripes; i++) {
11219                 btrfs_release_path(&path);
11220                 btrfs_init_path(&path);
11221                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11222                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11223                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11224
11225                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11226                                         0, 0);
11227                 if (ret)
11228                         goto not_match_dev;
11229
11230                 leaf = path.nodes[0];
11231                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11232                                      struct btrfs_dev_extent);
11233                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11234                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11235                 if (objectid != chunk_key.objectid ||
11236                     offset != chunk_key.offset ||
11237                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11238                         goto not_match_dev;
11239                 continue;
11240 not_match_dev:
11241                 err |= BACKREF_MISSING;
11242                 error(
11243                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11244                         chunk_key.objectid, chunk_end, i);
11245                 continue;
11246         }
11247         btrfs_release_path(&path);
11248 out:
11249         return err;
11250 }
11251
11252 /*
11253  * Main entry function to check known items and update related accounting info
11254  */
11255 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11256 {
11257         struct btrfs_fs_info *fs_info = root->fs_info;
11258         struct btrfs_key key;
11259         int slot = 0;
11260         int type;
11261         struct btrfs_extent_data_ref *dref;
11262         int ret;
11263         int err = 0;
11264
11265 next:
11266         btrfs_item_key_to_cpu(eb, &key, slot);
11267         type = key.type;
11268
11269         switch (type) {
11270         case BTRFS_EXTENT_DATA_KEY:
11271                 ret = check_extent_data_item(root, eb, slot);
11272                 err |= ret;
11273                 break;
11274         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11275                 ret = check_block_group_item(fs_info, eb, slot);
11276                 err |= ret;
11277                 break;
11278         case BTRFS_DEV_ITEM_KEY:
11279                 ret = check_dev_item(fs_info, eb, slot);
11280                 err |= ret;
11281                 break;
11282         case BTRFS_CHUNK_ITEM_KEY:
11283                 ret = check_chunk_item(fs_info, eb, slot);
11284                 err |= ret;
11285                 break;
11286         case BTRFS_DEV_EXTENT_KEY:
11287                 ret = check_dev_extent_item(fs_info, eb, slot);
11288                 err |= ret;
11289                 break;
11290         case BTRFS_EXTENT_ITEM_KEY:
11291         case BTRFS_METADATA_ITEM_KEY:
11292                 ret = check_extent_item(fs_info, eb, slot);
11293                 err |= ret;
11294                 break;
11295         case BTRFS_EXTENT_CSUM_KEY:
11296                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11297                 break;
11298         case BTRFS_TREE_BLOCK_REF_KEY:
11299                 ret = check_tree_block_backref(fs_info, key.offset,
11300                                                key.objectid, -1);
11301                 err |= ret;
11302                 break;
11303         case BTRFS_EXTENT_DATA_REF_KEY:
11304                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11305                 ret = check_extent_data_backref(fs_info,
11306                                 btrfs_extent_data_ref_root(eb, dref),
11307                                 btrfs_extent_data_ref_objectid(eb, dref),
11308                                 btrfs_extent_data_ref_offset(eb, dref),
11309                                 key.objectid, 0,
11310                                 btrfs_extent_data_ref_count(eb, dref));
11311                 err |= ret;
11312                 break;
11313         case BTRFS_SHARED_BLOCK_REF_KEY:
11314                 ret = check_shared_block_backref(fs_info, key.offset,
11315                                                  key.objectid, -1);
11316                 err |= ret;
11317                 break;
11318         case BTRFS_SHARED_DATA_REF_KEY:
11319                 ret = check_shared_data_backref(fs_info, key.offset,
11320                                                 key.objectid);
11321                 err |= ret;
11322                 break;
11323         default:
11324                 break;
11325         }
11326
11327         if (++slot < btrfs_header_nritems(eb))
11328                 goto next;
11329
11330         return err;
11331 }
11332
11333 /*
11334  * Helper function for later fs/subvol tree check.  To determine if a tree
11335  * block should be checked.
11336  * This function will ensure only the direct referencer with lowest rootid to
11337  * check a fs/subvolume tree block.
11338  *
11339  * Backref check at extent tree would detect errors like missing subvolume
11340  * tree, so we can do aggressive check to reduce duplicated checks.
11341  */
11342 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11343 {
11344         struct btrfs_root *extent_root = root->fs_info->extent_root;
11345         struct btrfs_key key;
11346         struct btrfs_path path;
11347         struct extent_buffer *leaf;
11348         int slot;
11349         struct btrfs_extent_item *ei;
11350         unsigned long ptr;
11351         unsigned long end;
11352         int type;
11353         u32 item_size;
11354         u64 offset;
11355         struct btrfs_extent_inline_ref *iref;
11356         int ret;
11357
11358         btrfs_init_path(&path);
11359         key.objectid = btrfs_header_bytenr(eb);
11360         key.type = BTRFS_METADATA_ITEM_KEY;
11361         key.offset = (u64)-1;
11362
11363         /*
11364          * Any failure in backref resolving means we can't determine
11365          * whom the tree block belongs to.
11366          * So in that case, we need to check that tree block
11367          */
11368         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11369         if (ret < 0)
11370                 goto need_check;
11371
11372         ret = btrfs_previous_extent_item(extent_root, &path,
11373                                          btrfs_header_bytenr(eb));
11374         if (ret)
11375                 goto need_check;
11376
11377         leaf = path.nodes[0];
11378         slot = path.slots[0];
11379         btrfs_item_key_to_cpu(leaf, &key, slot);
11380         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11381
11382         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11383                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11384         } else {
11385                 struct btrfs_tree_block_info *info;
11386
11387                 info = (struct btrfs_tree_block_info *)(ei + 1);
11388                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11389         }
11390
11391         item_size = btrfs_item_size_nr(leaf, slot);
11392         ptr = (unsigned long)iref;
11393         end = (unsigned long)ei + item_size;
11394         while (ptr < end) {
11395                 iref = (struct btrfs_extent_inline_ref *)ptr;
11396                 type = btrfs_extent_inline_ref_type(leaf, iref);
11397                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11398
11399                 /*
11400                  * We only check the tree block if current root is
11401                  * the lowest referencer of it.
11402                  */
11403                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11404                     offset < root->objectid) {
11405                         btrfs_release_path(&path);
11406                         return 0;
11407                 }
11408
11409                 ptr += btrfs_extent_inline_ref_size(type);
11410         }
11411         /*
11412          * Normally we should also check keyed tree block ref, but that may be
11413          * very time consuming.  Inlined ref should already make us skip a lot
11414          * of refs now.  So skip search keyed tree block ref.
11415          */
11416
11417 need_check:
11418         btrfs_release_path(&path);
11419         return 1;
11420 }
11421
11422 /*
11423  * Traversal function for tree block. We will do:
11424  * 1) Skip shared fs/subvolume tree blocks
11425  * 2) Update related bytes accounting
11426  * 3) Pre-order traversal
11427  */
11428 static int traverse_tree_block(struct btrfs_root *root,
11429                                 struct extent_buffer *node)
11430 {
11431         struct extent_buffer *eb;
11432         struct btrfs_key key;
11433         struct btrfs_key drop_key;
11434         int level;
11435         u64 nr;
11436         int i;
11437         int err = 0;
11438         int ret;
11439
11440         /*
11441          * Skip shared fs/subvolume tree block, in that case they will
11442          * be checked by referencer with lowest rootid
11443          */
11444         if (is_fstree(root->objectid) && !should_check(root, node))
11445                 return 0;
11446
11447         /* Update bytes accounting */
11448         total_btree_bytes += node->len;
11449         if (fs_root_objectid(btrfs_header_owner(node)))
11450                 total_fs_tree_bytes += node->len;
11451         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11452                 total_extent_tree_bytes += node->len;
11453
11454         /* pre-order tranversal, check itself first */
11455         level = btrfs_header_level(node);
11456         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11457                                    btrfs_header_level(node),
11458                                    btrfs_header_owner(node));
11459         err |= ret;
11460         if (err)
11461                 error(
11462         "check %s failed root %llu bytenr %llu level %d, force continue check",
11463                         level ? "node":"leaf", root->objectid,
11464                         btrfs_header_bytenr(node), btrfs_header_level(node));
11465
11466         if (!level) {
11467                 btree_space_waste += btrfs_leaf_free_space(root, node);
11468                 ret = check_leaf_items(root, node);
11469                 err |= ret;
11470                 return err;
11471         }
11472
11473         nr = btrfs_header_nritems(node);
11474         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11475         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11476                 sizeof(struct btrfs_key_ptr);
11477
11478         /* Then check all its children */
11479         for (i = 0; i < nr; i++) {
11480                 u64 blocknr = btrfs_node_blockptr(node, i);
11481
11482                 btrfs_node_key_to_cpu(node, &key, i);
11483                 if (level == root->root_item.drop_level &&
11484                     is_dropped_key(&key, &drop_key))
11485                         continue;
11486
11487                 /*
11488                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11489                  * to call the function itself.
11490                  */
11491                 eb = read_tree_block(root->fs_info, blocknr, 0);
11492                 if (extent_buffer_uptodate(eb)) {
11493                         ret = traverse_tree_block(root, eb);
11494                         err |= ret;
11495                 }
11496                 free_extent_buffer(eb);
11497         }
11498
11499         return err;
11500 }
11501
11502 /*
11503  * Low memory usage version check_chunks_and_extents.
11504  */
11505 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11506 {
11507         struct btrfs_path path;
11508         struct btrfs_key key;
11509         struct btrfs_root *root1;
11510         struct btrfs_root *cur_root;
11511         int err = 0;
11512         int ret;
11513
11514         root1 = root->fs_info->chunk_root;
11515         ret = traverse_tree_block(root1, root1->node);
11516         err |= ret;
11517
11518         root1 = root->fs_info->tree_root;
11519         ret = traverse_tree_block(root1, root1->node);
11520         err |= ret;
11521
11522         btrfs_init_path(&path);
11523         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11524         key.offset = 0;
11525         key.type = BTRFS_ROOT_ITEM_KEY;
11526
11527         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11528         if (ret) {
11529                 error("cannot find extent treet in tree_root");
11530                 goto out;
11531         }
11532
11533         while (1) {
11534                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11535                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11536                         goto next;
11537                 key.offset = (u64)-1;
11538
11539                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11540                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11541                                         &key);
11542                 else
11543                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11544                 if (IS_ERR(cur_root) || !cur_root) {
11545                         error("failed to read tree: %lld", key.objectid);
11546                         goto next;
11547                 }
11548
11549                 ret = traverse_tree_block(cur_root, cur_root->node);
11550                 err |= ret;
11551
11552                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11553                         btrfs_free_fs_root(cur_root);
11554 next:
11555                 ret = btrfs_next_item(root1, &path);
11556                 if (ret)
11557                         goto out;
11558         }
11559
11560 out:
11561         btrfs_release_path(&path);
11562         return err;
11563 }
11564
11565 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11566                            struct btrfs_root *root, int overwrite)
11567 {
11568         struct extent_buffer *c;
11569         struct extent_buffer *old = root->node;
11570         int level;
11571         int ret;
11572         struct btrfs_disk_key disk_key = {0,0,0};
11573
11574         level = 0;
11575
11576         if (overwrite) {
11577                 c = old;
11578                 extent_buffer_get(c);
11579                 goto init;
11580         }
11581         c = btrfs_alloc_free_block(trans, root,
11582                                    root->fs_info->nodesize,
11583                                    root->root_key.objectid,
11584                                    &disk_key, level, 0, 0);
11585         if (IS_ERR(c)) {
11586                 c = old;
11587                 extent_buffer_get(c);
11588                 overwrite = 1;
11589         }
11590 init:
11591         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11592         btrfs_set_header_level(c, level);
11593         btrfs_set_header_bytenr(c, c->start);
11594         btrfs_set_header_generation(c, trans->transid);
11595         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11596         btrfs_set_header_owner(c, root->root_key.objectid);
11597
11598         write_extent_buffer(c, root->fs_info->fsid,
11599                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11600
11601         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11602                             btrfs_header_chunk_tree_uuid(c),
11603                             BTRFS_UUID_SIZE);
11604
11605         btrfs_mark_buffer_dirty(c);
11606         /*
11607          * this case can happen in the following case:
11608          *
11609          * 1.overwrite previous root.
11610          *
11611          * 2.reinit reloc data root, this is because we skip pin
11612          * down reloc data tree before which means we can allocate
11613          * same block bytenr here.
11614          */
11615         if (old->start == c->start) {
11616                 btrfs_set_root_generation(&root->root_item,
11617                                           trans->transid);
11618                 root->root_item.level = btrfs_header_level(root->node);
11619                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11620                                         &root->root_key, &root->root_item);
11621                 if (ret) {
11622                         free_extent_buffer(c);
11623                         return ret;
11624                 }
11625         }
11626         free_extent_buffer(old);
11627         root->node = c;
11628         add_root_to_dirty_list(root);
11629         return 0;
11630 }
11631
11632 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11633                                 struct extent_buffer *eb, int tree_root)
11634 {
11635         struct extent_buffer *tmp;
11636         struct btrfs_root_item *ri;
11637         struct btrfs_key key;
11638         u64 bytenr;
11639         int level = btrfs_header_level(eb);
11640         int nritems;
11641         int ret;
11642         int i;
11643
11644         /*
11645          * If we have pinned this block before, don't pin it again.
11646          * This can not only avoid forever loop with broken filesystem
11647          * but also give us some speedups.
11648          */
11649         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11650                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11651                 return 0;
11652
11653         btrfs_pin_extent(fs_info, eb->start, eb->len);
11654
11655         nritems = btrfs_header_nritems(eb);
11656         for (i = 0; i < nritems; i++) {
11657                 if (level == 0) {
11658                         btrfs_item_key_to_cpu(eb, &key, i);
11659                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11660                                 continue;
11661                         /* Skip the extent root and reloc roots */
11662                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11663                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11664                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11665                                 continue;
11666                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11667                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11668
11669                         /*
11670                          * If at any point we start needing the real root we
11671                          * will have to build a stump root for the root we are
11672                          * in, but for now this doesn't actually use the root so
11673                          * just pass in extent_root.
11674                          */
11675                         tmp = read_tree_block(fs_info, bytenr, 0);
11676                         if (!extent_buffer_uptodate(tmp)) {
11677                                 fprintf(stderr, "Error reading root block\n");
11678                                 return -EIO;
11679                         }
11680                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11681                         free_extent_buffer(tmp);
11682                         if (ret)
11683                                 return ret;
11684                 } else {
11685                         bytenr = btrfs_node_blockptr(eb, i);
11686
11687                         /* If we aren't the tree root don't read the block */
11688                         if (level == 1 && !tree_root) {
11689                                 btrfs_pin_extent(fs_info, bytenr,
11690                                                 fs_info->nodesize);
11691                                 continue;
11692                         }
11693
11694                         tmp = read_tree_block(fs_info, bytenr, 0);
11695                         if (!extent_buffer_uptodate(tmp)) {
11696                                 fprintf(stderr, "Error reading tree block\n");
11697                                 return -EIO;
11698                         }
11699                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11700                         free_extent_buffer(tmp);
11701                         if (ret)
11702                                 return ret;
11703                 }
11704         }
11705
11706         return 0;
11707 }
11708
11709 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11710 {
11711         int ret;
11712
11713         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11714         if (ret)
11715                 return ret;
11716
11717         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11718 }
11719
11720 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11721 {
11722         struct btrfs_block_group_cache *cache;
11723         struct btrfs_path path;
11724         struct extent_buffer *leaf;
11725         struct btrfs_chunk *chunk;
11726         struct btrfs_key key;
11727         int ret;
11728         u64 start;
11729
11730         btrfs_init_path(&path);
11731         key.objectid = 0;
11732         key.type = BTRFS_CHUNK_ITEM_KEY;
11733         key.offset = 0;
11734         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11735         if (ret < 0) {
11736                 btrfs_release_path(&path);
11737                 return ret;
11738         }
11739
11740         /*
11741          * We do this in case the block groups were screwed up and had alloc
11742          * bits that aren't actually set on the chunks.  This happens with
11743          * restored images every time and could happen in real life I guess.
11744          */
11745         fs_info->avail_data_alloc_bits = 0;
11746         fs_info->avail_metadata_alloc_bits = 0;
11747         fs_info->avail_system_alloc_bits = 0;
11748
11749         /* First we need to create the in-memory block groups */
11750         while (1) {
11751                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11752                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11753                         if (ret < 0) {
11754                                 btrfs_release_path(&path);
11755                                 return ret;
11756                         }
11757                         if (ret) {
11758                                 ret = 0;
11759                                 break;
11760                         }
11761                 }
11762                 leaf = path.nodes[0];
11763                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11764                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11765                         path.slots[0]++;
11766                         continue;
11767                 }
11768
11769                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11770                 btrfs_add_block_group(fs_info, 0,
11771                                       btrfs_chunk_type(leaf, chunk),
11772                                       key.objectid, key.offset,
11773                                       btrfs_chunk_length(leaf, chunk));
11774                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11775                                  key.offset + btrfs_chunk_length(leaf, chunk));
11776                 path.slots[0]++;
11777         }
11778         start = 0;
11779         while (1) {
11780                 cache = btrfs_lookup_first_block_group(fs_info, start);
11781                 if (!cache)
11782                         break;
11783                 cache->cached = 1;
11784                 start = cache->key.objectid + cache->key.offset;
11785         }
11786
11787         btrfs_release_path(&path);
11788         return 0;
11789 }
11790
11791 static int reset_balance(struct btrfs_trans_handle *trans,
11792                          struct btrfs_fs_info *fs_info)
11793 {
11794         struct btrfs_root *root = fs_info->tree_root;
11795         struct btrfs_path path;
11796         struct extent_buffer *leaf;
11797         struct btrfs_key key;
11798         int del_slot, del_nr = 0;
11799         int ret;
11800         int found = 0;
11801
11802         btrfs_init_path(&path);
11803         key.objectid = BTRFS_BALANCE_OBJECTID;
11804         key.type = BTRFS_BALANCE_ITEM_KEY;
11805         key.offset = 0;
11806         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11807         if (ret) {
11808                 if (ret > 0)
11809                         ret = 0;
11810                 if (!ret)
11811                         goto reinit_data_reloc;
11812                 else
11813                         goto out;
11814         }
11815
11816         ret = btrfs_del_item(trans, root, &path);
11817         if (ret)
11818                 goto out;
11819         btrfs_release_path(&path);
11820
11821         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11822         key.type = BTRFS_ROOT_ITEM_KEY;
11823         key.offset = 0;
11824         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11825         if (ret < 0)
11826                 goto out;
11827         while (1) {
11828                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11829                         if (!found)
11830                                 break;
11831
11832                         if (del_nr) {
11833                                 ret = btrfs_del_items(trans, root, &path,
11834                                                       del_slot, del_nr);
11835                                 del_nr = 0;
11836                                 if (ret)
11837                                         goto out;
11838                         }
11839                         key.offset++;
11840                         btrfs_release_path(&path);
11841
11842                         found = 0;
11843                         ret = btrfs_search_slot(trans, root, &key, &path,
11844                                                 -1, 1);
11845                         if (ret < 0)
11846                                 goto out;
11847                         continue;
11848                 }
11849                 found = 1;
11850                 leaf = path.nodes[0];
11851                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11852                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11853                         break;
11854                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11855                         path.slots[0]++;
11856                         continue;
11857                 }
11858                 if (!del_nr) {
11859                         del_slot = path.slots[0];
11860                         del_nr = 1;
11861                 } else {
11862                         del_nr++;
11863                 }
11864                 path.slots[0]++;
11865         }
11866
11867         if (del_nr) {
11868                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11869                 if (ret)
11870                         goto out;
11871         }
11872         btrfs_release_path(&path);
11873
11874 reinit_data_reloc:
11875         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11876         key.type = BTRFS_ROOT_ITEM_KEY;
11877         key.offset = (u64)-1;
11878         root = btrfs_read_fs_root(fs_info, &key);
11879         if (IS_ERR(root)) {
11880                 fprintf(stderr, "Error reading data reloc tree\n");
11881                 ret = PTR_ERR(root);
11882                 goto out;
11883         }
11884         record_root_in_trans(trans, root);
11885         ret = btrfs_fsck_reinit_root(trans, root, 0);
11886         if (ret)
11887                 goto out;
11888         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11889 out:
11890         btrfs_release_path(&path);
11891         return ret;
11892 }
11893
11894 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11895                               struct btrfs_fs_info *fs_info)
11896 {
11897         u64 start = 0;
11898         int ret;
11899
11900         /*
11901          * The only reason we don't do this is because right now we're just
11902          * walking the trees we find and pinning down their bytes, we don't look
11903          * at any of the leaves.  In order to do mixed groups we'd have to check
11904          * the leaves of any fs roots and pin down the bytes for any file
11905          * extents we find.  Not hard but why do it if we don't have to?
11906          */
11907         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11908                 fprintf(stderr, "We don't support re-initing the extent tree "
11909                         "for mixed block groups yet, please notify a btrfs "
11910                         "developer you want to do this so they can add this "
11911                         "functionality.\n");
11912                 return -EINVAL;
11913         }
11914
11915         /*
11916          * first we need to walk all of the trees except the extent tree and pin
11917          * down the bytes that are in use so we don't overwrite any existing
11918          * metadata.
11919          */
11920         ret = pin_metadata_blocks(fs_info);
11921         if (ret) {
11922                 fprintf(stderr, "error pinning down used bytes\n");
11923                 return ret;
11924         }
11925
11926         /*
11927          * Need to drop all the block groups since we're going to recreate all
11928          * of them again.
11929          */
11930         btrfs_free_block_groups(fs_info);
11931         ret = reset_block_groups(fs_info);
11932         if (ret) {
11933                 fprintf(stderr, "error resetting the block groups\n");
11934                 return ret;
11935         }
11936
11937         /* Ok we can allocate now, reinit the extent root */
11938         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11939         if (ret) {
11940                 fprintf(stderr, "extent root initialization failed\n");
11941                 /*
11942                  * When the transaction code is updated we should end the
11943                  * transaction, but for now progs only knows about commit so
11944                  * just return an error.
11945                  */
11946                 return ret;
11947         }
11948
11949         /*
11950          * Now we have all the in-memory block groups setup so we can make
11951          * allocations properly, and the metadata we care about is safe since we
11952          * pinned all of it above.
11953          */
11954         while (1) {
11955                 struct btrfs_block_group_cache *cache;
11956
11957                 cache = btrfs_lookup_first_block_group(fs_info, start);
11958                 if (!cache)
11959                         break;
11960                 start = cache->key.objectid + cache->key.offset;
11961                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11962                                         &cache->key, &cache->item,
11963                                         sizeof(cache->item));
11964                 if (ret) {
11965                         fprintf(stderr, "Error adding block group\n");
11966                         return ret;
11967                 }
11968                 btrfs_extent_post_op(trans, fs_info->extent_root);
11969         }
11970
11971         ret = reset_balance(trans, fs_info);
11972         if (ret)
11973                 fprintf(stderr, "error resetting the pending balance\n");
11974
11975         return ret;
11976 }
11977
11978 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11979 {
11980         struct btrfs_path path;
11981         struct btrfs_trans_handle *trans;
11982         struct btrfs_key key;
11983         int ret;
11984
11985         printf("Recowing metadata block %llu\n", eb->start);
11986         key.objectid = btrfs_header_owner(eb);
11987         key.type = BTRFS_ROOT_ITEM_KEY;
11988         key.offset = (u64)-1;
11989
11990         root = btrfs_read_fs_root(root->fs_info, &key);
11991         if (IS_ERR(root)) {
11992                 fprintf(stderr, "Couldn't find owner root %llu\n",
11993                         key.objectid);
11994                 return PTR_ERR(root);
11995         }
11996
11997         trans = btrfs_start_transaction(root, 1);
11998         if (IS_ERR(trans))
11999                 return PTR_ERR(trans);
12000
12001         btrfs_init_path(&path);
12002         path.lowest_level = btrfs_header_level(eb);
12003         if (path.lowest_level)
12004                 btrfs_node_key_to_cpu(eb, &key, 0);
12005         else
12006                 btrfs_item_key_to_cpu(eb, &key, 0);
12007
12008         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12009         btrfs_commit_transaction(trans, root);
12010         btrfs_release_path(&path);
12011         return ret;
12012 }
12013
12014 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12015 {
12016         struct btrfs_path path;
12017         struct btrfs_trans_handle *trans;
12018         struct btrfs_key key;
12019         int ret;
12020
12021         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12022                bad->key.type, bad->key.offset);
12023         key.objectid = bad->root_id;
12024         key.type = BTRFS_ROOT_ITEM_KEY;
12025         key.offset = (u64)-1;
12026
12027         root = btrfs_read_fs_root(root->fs_info, &key);
12028         if (IS_ERR(root)) {
12029                 fprintf(stderr, "Couldn't find owner root %llu\n",
12030                         key.objectid);
12031                 return PTR_ERR(root);
12032         }
12033
12034         trans = btrfs_start_transaction(root, 1);
12035         if (IS_ERR(trans))
12036                 return PTR_ERR(trans);
12037
12038         btrfs_init_path(&path);
12039         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12040         if (ret) {
12041                 if (ret > 0)
12042                         ret = 0;
12043                 goto out;
12044         }
12045         ret = btrfs_del_item(trans, root, &path);
12046 out:
12047         btrfs_commit_transaction(trans, root);
12048         btrfs_release_path(&path);
12049         return ret;
12050 }
12051
12052 static int zero_log_tree(struct btrfs_root *root)
12053 {
12054         struct btrfs_trans_handle *trans;
12055         int ret;
12056
12057         trans = btrfs_start_transaction(root, 1);
12058         if (IS_ERR(trans)) {
12059                 ret = PTR_ERR(trans);
12060                 return ret;
12061         }
12062         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12063         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12064         ret = btrfs_commit_transaction(trans, root);
12065         return ret;
12066 }
12067
12068 static int populate_csum(struct btrfs_trans_handle *trans,
12069                          struct btrfs_root *csum_root, char *buf, u64 start,
12070                          u64 len)
12071 {
12072         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12073         u64 offset = 0;
12074         u64 sectorsize;
12075         int ret = 0;
12076
12077         while (offset < len) {
12078                 sectorsize = fs_info->sectorsize;
12079                 ret = read_extent_data(fs_info, buf, start + offset,
12080                                        &sectorsize, 0);
12081                 if (ret)
12082                         break;
12083                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12084                                             start + offset, buf, sectorsize);
12085                 if (ret)
12086                         break;
12087                 offset += sectorsize;
12088         }
12089         return ret;
12090 }
12091
12092 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12093                                       struct btrfs_root *csum_root,
12094                                       struct btrfs_root *cur_root)
12095 {
12096         struct btrfs_path path;
12097         struct btrfs_key key;
12098         struct extent_buffer *node;
12099         struct btrfs_file_extent_item *fi;
12100         char *buf = NULL;
12101         u64 start = 0;
12102         u64 len = 0;
12103         int slot = 0;
12104         int ret = 0;
12105
12106         buf = malloc(cur_root->fs_info->sectorsize);
12107         if (!buf)
12108                 return -ENOMEM;
12109
12110         btrfs_init_path(&path);
12111         key.objectid = 0;
12112         key.offset = 0;
12113         key.type = 0;
12114         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12115         if (ret < 0)
12116                 goto out;
12117         /* Iterate all regular file extents and fill its csum */
12118         while (1) {
12119                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12120
12121                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12122                         goto next;
12123                 node = path.nodes[0];
12124                 slot = path.slots[0];
12125                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12126                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12127                         goto next;
12128                 start = btrfs_file_extent_disk_bytenr(node, fi);
12129                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12130
12131                 ret = populate_csum(trans, csum_root, buf, start, len);
12132                 if (ret == -EEXIST)
12133                         ret = 0;
12134                 if (ret < 0)
12135                         goto out;
12136 next:
12137                 /*
12138                  * TODO: if next leaf is corrupted, jump to nearest next valid
12139                  * leaf.
12140                  */
12141                 ret = btrfs_next_item(cur_root, &path);
12142                 if (ret < 0)
12143                         goto out;
12144                 if (ret > 0) {
12145                         ret = 0;
12146                         goto out;
12147                 }
12148         }
12149
12150 out:
12151         btrfs_release_path(&path);
12152         free(buf);
12153         return ret;
12154 }
12155
12156 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12157                                   struct btrfs_root *csum_root)
12158 {
12159         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12160         struct btrfs_path path;
12161         struct btrfs_root *tree_root = fs_info->tree_root;
12162         struct btrfs_root *cur_root;
12163         struct extent_buffer *node;
12164         struct btrfs_key key;
12165         int slot = 0;
12166         int ret = 0;
12167
12168         btrfs_init_path(&path);
12169         key.objectid = BTRFS_FS_TREE_OBJECTID;
12170         key.offset = 0;
12171         key.type = BTRFS_ROOT_ITEM_KEY;
12172         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12173         if (ret < 0)
12174                 goto out;
12175         if (ret > 0) {
12176                 ret = -ENOENT;
12177                 goto out;
12178         }
12179
12180         while (1) {
12181                 node = path.nodes[0];
12182                 slot = path.slots[0];
12183                 btrfs_item_key_to_cpu(node, &key, slot);
12184                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12185                         goto out;
12186                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12187                         goto next;
12188                 if (!is_fstree(key.objectid))
12189                         goto next;
12190                 key.offset = (u64)-1;
12191
12192                 cur_root = btrfs_read_fs_root(fs_info, &key);
12193                 if (IS_ERR(cur_root) || !cur_root) {
12194                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12195                                 key.objectid);
12196                         goto out;
12197                 }
12198                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12199                                 cur_root);
12200                 if (ret < 0)
12201                         goto out;
12202 next:
12203                 ret = btrfs_next_item(tree_root, &path);
12204                 if (ret > 0) {
12205                         ret = 0;
12206                         goto out;
12207                 }
12208                 if (ret < 0)
12209                         goto out;
12210         }
12211
12212 out:
12213         btrfs_release_path(&path);
12214         return ret;
12215 }
12216
12217 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12218                                       struct btrfs_root *csum_root)
12219 {
12220         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12221         struct btrfs_path path;
12222         struct btrfs_extent_item *ei;
12223         struct extent_buffer *leaf;
12224         char *buf;
12225         struct btrfs_key key;
12226         int ret;
12227
12228         btrfs_init_path(&path);
12229         key.objectid = 0;
12230         key.type = BTRFS_EXTENT_ITEM_KEY;
12231         key.offset = 0;
12232         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12233         if (ret < 0) {
12234                 btrfs_release_path(&path);
12235                 return ret;
12236         }
12237
12238         buf = malloc(csum_root->fs_info->sectorsize);
12239         if (!buf) {
12240                 btrfs_release_path(&path);
12241                 return -ENOMEM;
12242         }
12243
12244         while (1) {
12245                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12246                         ret = btrfs_next_leaf(extent_root, &path);
12247                         if (ret < 0)
12248                                 break;
12249                         if (ret) {
12250                                 ret = 0;
12251                                 break;
12252                         }
12253                 }
12254                 leaf = path.nodes[0];
12255
12256                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12257                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12258                         path.slots[0]++;
12259                         continue;
12260                 }
12261
12262                 ei = btrfs_item_ptr(leaf, path.slots[0],
12263                                     struct btrfs_extent_item);
12264                 if (!(btrfs_extent_flags(leaf, ei) &
12265                       BTRFS_EXTENT_FLAG_DATA)) {
12266                         path.slots[0]++;
12267                         continue;
12268                 }
12269
12270                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12271                                     key.offset);
12272                 if (ret)
12273                         break;
12274                 path.slots[0]++;
12275         }
12276
12277         btrfs_release_path(&path);
12278         free(buf);
12279         return ret;
12280 }
12281
12282 /*
12283  * Recalculate the csum and put it into the csum tree.
12284  *
12285  * Extent tree init will wipe out all the extent info, so in that case, we
12286  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12287  * will use fs/subvol trees to init the csum tree.
12288  */
12289 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12290                           struct btrfs_root *csum_root,
12291                           int search_fs_tree)
12292 {
12293         if (search_fs_tree)
12294                 return fill_csum_tree_from_fs(trans, csum_root);
12295         else
12296                 return fill_csum_tree_from_extent(trans, csum_root);
12297 }
12298
12299 static void free_roots_info_cache(void)
12300 {
12301         if (!roots_info_cache)
12302                 return;
12303
12304         while (!cache_tree_empty(roots_info_cache)) {
12305                 struct cache_extent *entry;
12306                 struct root_item_info *rii;
12307
12308                 entry = first_cache_extent(roots_info_cache);
12309                 if (!entry)
12310                         break;
12311                 remove_cache_extent(roots_info_cache, entry);
12312                 rii = container_of(entry, struct root_item_info, cache_extent);
12313                 free(rii);
12314         }
12315
12316         free(roots_info_cache);
12317         roots_info_cache = NULL;
12318 }
12319
12320 static int build_roots_info_cache(struct btrfs_fs_info *info)
12321 {
12322         int ret = 0;
12323         struct btrfs_key key;
12324         struct extent_buffer *leaf;
12325         struct btrfs_path path;
12326
12327         if (!roots_info_cache) {
12328                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12329                 if (!roots_info_cache)
12330                         return -ENOMEM;
12331                 cache_tree_init(roots_info_cache);
12332         }
12333
12334         btrfs_init_path(&path);
12335         key.objectid = 0;
12336         key.type = BTRFS_EXTENT_ITEM_KEY;
12337         key.offset = 0;
12338         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12339         if (ret < 0)
12340                 goto out;
12341         leaf = path.nodes[0];
12342
12343         while (1) {
12344                 struct btrfs_key found_key;
12345                 struct btrfs_extent_item *ei;
12346                 struct btrfs_extent_inline_ref *iref;
12347                 int slot = path.slots[0];
12348                 int type;
12349                 u64 flags;
12350                 u64 root_id;
12351                 u8 level;
12352                 struct cache_extent *entry;
12353                 struct root_item_info *rii;
12354
12355                 if (slot >= btrfs_header_nritems(leaf)) {
12356                         ret = btrfs_next_leaf(info->extent_root, &path);
12357                         if (ret < 0) {
12358                                 break;
12359                         } else if (ret) {
12360                                 ret = 0;
12361                                 break;
12362                         }
12363                         leaf = path.nodes[0];
12364                         slot = path.slots[0];
12365                 }
12366
12367                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12368
12369                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12370                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12371                         goto next;
12372
12373                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12374                 flags = btrfs_extent_flags(leaf, ei);
12375
12376                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12377                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12378                         goto next;
12379
12380                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12381                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12382                         level = found_key.offset;
12383                 } else {
12384                         struct btrfs_tree_block_info *binfo;
12385
12386                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12387                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12388                         level = btrfs_tree_block_level(leaf, binfo);
12389                 }
12390
12391                 /*
12392                  * For a root extent, it must be of the following type and the
12393                  * first (and only one) iref in the item.
12394                  */
12395                 type = btrfs_extent_inline_ref_type(leaf, iref);
12396                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12397                         goto next;
12398
12399                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12400                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12401                 if (!entry) {
12402                         rii = malloc(sizeof(struct root_item_info));
12403                         if (!rii) {
12404                                 ret = -ENOMEM;
12405                                 goto out;
12406                         }
12407                         rii->cache_extent.start = root_id;
12408                         rii->cache_extent.size = 1;
12409                         rii->level = (u8)-1;
12410                         entry = &rii->cache_extent;
12411                         ret = insert_cache_extent(roots_info_cache, entry);
12412                         ASSERT(ret == 0);
12413                 } else {
12414                         rii = container_of(entry, struct root_item_info,
12415                                            cache_extent);
12416                 }
12417
12418                 ASSERT(rii->cache_extent.start == root_id);
12419                 ASSERT(rii->cache_extent.size == 1);
12420
12421                 if (level > rii->level || rii->level == (u8)-1) {
12422                         rii->level = level;
12423                         rii->bytenr = found_key.objectid;
12424                         rii->gen = btrfs_extent_generation(leaf, ei);
12425                         rii->node_count = 1;
12426                 } else if (level == rii->level) {
12427                         rii->node_count++;
12428                 }
12429 next:
12430                 path.slots[0]++;
12431         }
12432
12433 out:
12434         btrfs_release_path(&path);
12435
12436         return ret;
12437 }
12438
12439 static int maybe_repair_root_item(struct btrfs_path *path,
12440                                   const struct btrfs_key *root_key,
12441                                   const int read_only_mode)
12442 {
12443         const u64 root_id = root_key->objectid;
12444         struct cache_extent *entry;
12445         struct root_item_info *rii;
12446         struct btrfs_root_item ri;
12447         unsigned long offset;
12448
12449         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12450         if (!entry) {
12451                 fprintf(stderr,
12452                         "Error: could not find extent items for root %llu\n",
12453                         root_key->objectid);
12454                 return -ENOENT;
12455         }
12456
12457         rii = container_of(entry, struct root_item_info, cache_extent);
12458         ASSERT(rii->cache_extent.start == root_id);
12459         ASSERT(rii->cache_extent.size == 1);
12460
12461         if (rii->node_count != 1) {
12462                 fprintf(stderr,
12463                         "Error: could not find btree root extent for root %llu\n",
12464                         root_id);
12465                 return -ENOENT;
12466         }
12467
12468         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12469         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12470
12471         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12472             btrfs_root_level(&ri) != rii->level ||
12473             btrfs_root_generation(&ri) != rii->gen) {
12474
12475                 /*
12476                  * If we're in repair mode but our caller told us to not update
12477                  * the root item, i.e. just check if it needs to be updated, don't
12478                  * print this message, since the caller will call us again shortly
12479                  * for the same root item without read only mode (the caller will
12480                  * open a transaction first).
12481                  */
12482                 if (!(read_only_mode && repair))
12483                         fprintf(stderr,
12484                                 "%sroot item for root %llu,"
12485                                 " current bytenr %llu, current gen %llu, current level %u,"
12486                                 " new bytenr %llu, new gen %llu, new level %u\n",
12487                                 (read_only_mode ? "" : "fixing "),
12488                                 root_id,
12489                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12490                                 btrfs_root_level(&ri),
12491                                 rii->bytenr, rii->gen, rii->level);
12492
12493                 if (btrfs_root_generation(&ri) > rii->gen) {
12494                         fprintf(stderr,
12495                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12496                                 root_id, btrfs_root_generation(&ri), rii->gen);
12497                         return -EINVAL;
12498                 }
12499
12500                 if (!read_only_mode) {
12501                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12502                         btrfs_set_root_level(&ri, rii->level);
12503                         btrfs_set_root_generation(&ri, rii->gen);
12504                         write_extent_buffer(path->nodes[0], &ri,
12505                                             offset, sizeof(ri));
12506                 }
12507
12508                 return 1;
12509         }
12510
12511         return 0;
12512 }
12513
12514 /*
12515  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12516  * caused read-only snapshots to be corrupted if they were created at a moment
12517  * when the source subvolume/snapshot had orphan items. The issue was that the
12518  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12519  * node instead of the post orphan cleanup root node.
12520  * So this function, and its callees, just detects and fixes those cases. Even
12521  * though the regression was for read-only snapshots, this function applies to
12522  * any snapshot/subvolume root.
12523  * This must be run before any other repair code - not doing it so, makes other
12524  * repair code delete or modify backrefs in the extent tree for example, which
12525  * will result in an inconsistent fs after repairing the root items.
12526  */
12527 static int repair_root_items(struct btrfs_fs_info *info)
12528 {
12529         struct btrfs_path path;
12530         struct btrfs_key key;
12531         struct extent_buffer *leaf;
12532         struct btrfs_trans_handle *trans = NULL;
12533         int ret = 0;
12534         int bad_roots = 0;
12535         int need_trans = 0;
12536
12537         btrfs_init_path(&path);
12538
12539         ret = build_roots_info_cache(info);
12540         if (ret)
12541                 goto out;
12542
12543         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12544         key.type = BTRFS_ROOT_ITEM_KEY;
12545         key.offset = 0;
12546
12547 again:
12548         /*
12549          * Avoid opening and committing transactions if a leaf doesn't have
12550          * any root items that need to be fixed, so that we avoid rotating
12551          * backup roots unnecessarily.
12552          */
12553         if (need_trans) {
12554                 trans = btrfs_start_transaction(info->tree_root, 1);
12555                 if (IS_ERR(trans)) {
12556                         ret = PTR_ERR(trans);
12557                         goto out;
12558                 }
12559         }
12560
12561         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12562                                 0, trans ? 1 : 0);
12563         if (ret < 0)
12564                 goto out;
12565         leaf = path.nodes[0];
12566
12567         while (1) {
12568                 struct btrfs_key found_key;
12569
12570                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12571                         int no_more_keys = find_next_key(&path, &key);
12572
12573                         btrfs_release_path(&path);
12574                         if (trans) {
12575                                 ret = btrfs_commit_transaction(trans,
12576                                                                info->tree_root);
12577                                 trans = NULL;
12578                                 if (ret < 0)
12579                                         goto out;
12580                         }
12581                         need_trans = 0;
12582                         if (no_more_keys)
12583                                 break;
12584                         goto again;
12585                 }
12586
12587                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12588
12589                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12590                         goto next;
12591                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12592                         goto next;
12593
12594                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12595                 if (ret < 0)
12596                         goto out;
12597                 if (ret) {
12598                         if (!trans && repair) {
12599                                 need_trans = 1;
12600                                 key = found_key;
12601                                 btrfs_release_path(&path);
12602                                 goto again;
12603                         }
12604                         bad_roots++;
12605                 }
12606 next:
12607                 path.slots[0]++;
12608         }
12609         ret = 0;
12610 out:
12611         free_roots_info_cache();
12612         btrfs_release_path(&path);
12613         if (trans)
12614                 btrfs_commit_transaction(trans, info->tree_root);
12615         if (ret < 0)
12616                 return ret;
12617
12618         return bad_roots;
12619 }
12620
12621 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12622 {
12623         struct btrfs_trans_handle *trans;
12624         struct btrfs_block_group_cache *bg_cache;
12625         u64 current = 0;
12626         int ret = 0;
12627
12628         /* Clear all free space cache inodes and its extent data */
12629         while (1) {
12630                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12631                 if (!bg_cache)
12632                         break;
12633                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12634                 if (ret < 0)
12635                         return ret;
12636                 current = bg_cache->key.objectid + bg_cache->key.offset;
12637         }
12638
12639         /* Don't forget to set cache_generation to -1 */
12640         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12641         if (IS_ERR(trans)) {
12642                 error("failed to update super block cache generation");
12643                 return PTR_ERR(trans);
12644         }
12645         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12646         btrfs_commit_transaction(trans, fs_info->tree_root);
12647
12648         return ret;
12649 }
12650
12651 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12652                 int clear_version)
12653 {
12654         int ret = 0;
12655
12656         if (clear_version == 1) {
12657                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12658                         error(
12659                 "free space cache v2 detected, use --clear-space-cache v2");
12660                         ret = 1;
12661                         goto close_out;
12662                 }
12663                 printf("Clearing free space cache\n");
12664                 ret = clear_free_space_cache(fs_info);
12665                 if (ret) {
12666                         error("failed to clear free space cache");
12667                         ret = 1;
12668                 } else {
12669                         printf("Free space cache cleared\n");
12670                 }
12671         } else if (clear_version == 2) {
12672                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12673                         printf("no free space cache v2 to clear\n");
12674                         ret = 0;
12675                         goto close_out;
12676                 }
12677                 printf("Clear free space cache v2\n");
12678                 ret = btrfs_clear_free_space_tree(fs_info);
12679                 if (ret) {
12680                         error("failed to clear free space cache v2: %d", ret);
12681                         ret = 1;
12682                 } else {
12683                         printf("free space cache v2 cleared\n");
12684                 }
12685         }
12686 close_out:
12687         return ret;
12688 }
12689
12690 const char * const cmd_check_usage[] = {
12691         "btrfs check [options] <device>",
12692         "Check structural integrity of a filesystem (unmounted).",
12693         "Check structural integrity of an unmounted filesystem. Verify internal",
12694         "trees' consistency and item connectivity. In the repair mode try to",
12695         "fix the problems found. ",
12696         "WARNING: the repair mode is considered dangerous",
12697         "",
12698         "-s|--super <superblock>     use this superblock copy",
12699         "-b|--backup                 use the first valid backup root copy",
12700         "--repair                    try to repair the filesystem",
12701         "--readonly                  run in read-only mode (default)",
12702         "--init-csum-tree            create a new CRC tree",
12703         "--init-extent-tree          create a new extent tree",
12704         "--mode <MODE>               allows choice of memory/IO trade-offs",
12705         "                            where MODE is one of:",
12706         "                            original - read inodes and extents to memory (requires",
12707         "                                       more memory, does less IO)",
12708         "                            lowmem   - try to use less memory but read blocks again",
12709         "                                       when needed",
12710         "--check-data-csum           verify checksums of data blocks",
12711         "-Q|--qgroup-report          print a report on qgroup consistency",
12712         "-E|--subvol-extents <subvolid>",
12713         "                            print subvolume extents and sharing state",
12714         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12715         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12716         "-p|--progress               indicate progress",
12717         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12718         NULL
12719 };
12720
12721 int cmd_check(int argc, char **argv)
12722 {
12723         struct cache_tree root_cache;
12724         struct btrfs_root *root;
12725         struct btrfs_fs_info *info;
12726         u64 bytenr = 0;
12727         u64 subvolid = 0;
12728         u64 tree_root_bytenr = 0;
12729         u64 chunk_root_bytenr = 0;
12730         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12731         int ret;
12732         int err = 0;
12733         u64 num;
12734         int init_csum_tree = 0;
12735         int readonly = 0;
12736         int clear_space_cache = 0;
12737         int qgroup_report = 0;
12738         int qgroups_repaired = 0;
12739         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12740
12741         while(1) {
12742                 int c;
12743                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12744                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12745                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12746                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12747                 static const struct option long_options[] = {
12748                         { "super", required_argument, NULL, 's' },
12749                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12750                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12751                         { "init-csum-tree", no_argument, NULL,
12752                                 GETOPT_VAL_INIT_CSUM },
12753                         { "init-extent-tree", no_argument, NULL,
12754                                 GETOPT_VAL_INIT_EXTENT },
12755                         { "check-data-csum", no_argument, NULL,
12756                                 GETOPT_VAL_CHECK_CSUM },
12757                         { "backup", no_argument, NULL, 'b' },
12758                         { "subvol-extents", required_argument, NULL, 'E' },
12759                         { "qgroup-report", no_argument, NULL, 'Q' },
12760                         { "tree-root", required_argument, NULL, 'r' },
12761                         { "chunk-root", required_argument, NULL,
12762                                 GETOPT_VAL_CHUNK_TREE },
12763                         { "progress", no_argument, NULL, 'p' },
12764                         { "mode", required_argument, NULL,
12765                                 GETOPT_VAL_MODE },
12766                         { "clear-space-cache", required_argument, NULL,
12767                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12768                         { NULL, 0, NULL, 0}
12769                 };
12770
12771                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12772                 if (c < 0)
12773                         break;
12774                 switch(c) {
12775                         case 'a': /* ignored */ break;
12776                         case 'b':
12777                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12778                                 break;
12779                         case 's':
12780                                 num = arg_strtou64(optarg);
12781                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12782                                         error(
12783                                         "super mirror should be less than %d",
12784                                                 BTRFS_SUPER_MIRROR_MAX);
12785                                         exit(1);
12786                                 }
12787                                 bytenr = btrfs_sb_offset(((int)num));
12788                                 printf("using SB copy %llu, bytenr %llu\n", num,
12789                                        (unsigned long long)bytenr);
12790                                 break;
12791                         case 'Q':
12792                                 qgroup_report = 1;
12793                                 break;
12794                         case 'E':
12795                                 subvolid = arg_strtou64(optarg);
12796                                 break;
12797                         case 'r':
12798                                 tree_root_bytenr = arg_strtou64(optarg);
12799                                 break;
12800                         case GETOPT_VAL_CHUNK_TREE:
12801                                 chunk_root_bytenr = arg_strtou64(optarg);
12802                                 break;
12803                         case 'p':
12804                                 ctx.progress_enabled = true;
12805                                 break;
12806                         case '?':
12807                         case 'h':
12808                                 usage(cmd_check_usage);
12809                         case GETOPT_VAL_REPAIR:
12810                                 printf("enabling repair mode\n");
12811                                 repair = 1;
12812                                 ctree_flags |= OPEN_CTREE_WRITES;
12813                                 break;
12814                         case GETOPT_VAL_READONLY:
12815                                 readonly = 1;
12816                                 break;
12817                         case GETOPT_VAL_INIT_CSUM:
12818                                 printf("Creating a new CRC tree\n");
12819                                 init_csum_tree = 1;
12820                                 repair = 1;
12821                                 ctree_flags |= OPEN_CTREE_WRITES;
12822                                 break;
12823                         case GETOPT_VAL_INIT_EXTENT:
12824                                 init_extent_tree = 1;
12825                                 ctree_flags |= (OPEN_CTREE_WRITES |
12826                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12827                                 repair = 1;
12828                                 break;
12829                         case GETOPT_VAL_CHECK_CSUM:
12830                                 check_data_csum = 1;
12831                                 break;
12832                         case GETOPT_VAL_MODE:
12833                                 check_mode = parse_check_mode(optarg);
12834                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12835                                         error("unknown mode: %s", optarg);
12836                                         exit(1);
12837                                 }
12838                                 break;
12839                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12840                                 if (strcmp(optarg, "v1") == 0) {
12841                                         clear_space_cache = 1;
12842                                 } else if (strcmp(optarg, "v2") == 0) {
12843                                         clear_space_cache = 2;
12844                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12845                                 } else {
12846                                         error(
12847                 "invalid argument to --clear-space-cache, must be v1 or v2");
12848                                         exit(1);
12849                                 }
12850                                 ctree_flags |= OPEN_CTREE_WRITES;
12851                                 break;
12852                 }
12853         }
12854
12855         if (check_argc_exact(argc - optind, 1))
12856                 usage(cmd_check_usage);
12857
12858         if (ctx.progress_enabled) {
12859                 ctx.tp = TASK_NOTHING;
12860                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12861         }
12862
12863         /* This check is the only reason for --readonly to exist */
12864         if (readonly && repair) {
12865                 error("repair options are not compatible with --readonly");
12866                 exit(1);
12867         }
12868
12869         /*
12870          * Not supported yet
12871          */
12872         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12873                 error("low memory mode doesn't support repair yet");
12874                 exit(1);
12875         }
12876
12877         radix_tree_init();
12878         cache_tree_init(&root_cache);
12879
12880         if((ret = check_mounted(argv[optind])) < 0) {
12881                 error("could not check mount status: %s", strerror(-ret));
12882                 err |= !!ret;
12883                 goto err_out;
12884         } else if(ret) {
12885                 error("%s is currently mounted, aborting", argv[optind]);
12886                 ret = -EBUSY;
12887                 err |= !!ret;
12888                 goto err_out;
12889         }
12890
12891         /* only allow partial opening under repair mode */
12892         if (repair)
12893                 ctree_flags |= OPEN_CTREE_PARTIAL;
12894
12895         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12896                                   chunk_root_bytenr, ctree_flags);
12897         if (!info) {
12898                 error("cannot open file system");
12899                 ret = -EIO;
12900                 err |= !!ret;
12901                 goto err_out;
12902         }
12903
12904         global_info = info;
12905         root = info->fs_root;
12906
12907         if (clear_space_cache) {
12908                 ret = do_clear_free_space_cache(info, clear_space_cache);
12909                 err |= !!ret;
12910                 goto close_out;
12911         }
12912
12913         /*
12914          * repair mode will force us to commit transaction which
12915          * will make us fail to load log tree when mounting.
12916          */
12917         if (repair && btrfs_super_log_root(info->super_copy)) {
12918                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12919                 if (!ret) {
12920                         ret = 1;
12921                         err |= !!ret;
12922                         goto close_out;
12923                 }
12924                 ret = zero_log_tree(root);
12925                 err |= !!ret;
12926                 if (ret) {
12927                         error("failed to zero log tree: %d", ret);
12928                         goto close_out;
12929                 }
12930         }
12931
12932         uuid_unparse(info->super_copy->fsid, uuidbuf);
12933         if (qgroup_report) {
12934                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12935                        uuidbuf);
12936                 ret = qgroup_verify_all(info);
12937                 err |= !!ret;
12938                 if (ret == 0)
12939                         report_qgroups(1);
12940                 goto close_out;
12941         }
12942         if (subvolid) {
12943                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12944                        subvolid, argv[optind], uuidbuf);
12945                 ret = print_extent_state(info, subvolid);
12946                 err |= !!ret;
12947                 goto close_out;
12948         }
12949         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12950
12951         if (!extent_buffer_uptodate(info->tree_root->node) ||
12952             !extent_buffer_uptodate(info->dev_root->node) ||
12953             !extent_buffer_uptodate(info->chunk_root->node)) {
12954                 error("critical roots corrupted, unable to check the filesystem");
12955                 err |= !!ret;
12956                 ret = -EIO;
12957                 goto close_out;
12958         }
12959
12960         if (init_extent_tree || init_csum_tree) {
12961                 struct btrfs_trans_handle *trans;
12962
12963                 trans = btrfs_start_transaction(info->extent_root, 0);
12964                 if (IS_ERR(trans)) {
12965                         error("error starting transaction");
12966                         ret = PTR_ERR(trans);
12967                         err |= !!ret;
12968                         goto close_out;
12969                 }
12970
12971                 if (init_extent_tree) {
12972                         printf("Creating a new extent tree\n");
12973                         ret = reinit_extent_tree(trans, info);
12974                         err |= !!ret;
12975                         if (ret)
12976                                 goto close_out;
12977                 }
12978
12979                 if (init_csum_tree) {
12980                         printf("Reinitialize checksum tree\n");
12981                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12982                         if (ret) {
12983                                 error("checksum tree initialization failed: %d",
12984                                                 ret);
12985                                 ret = -EIO;
12986                                 err |= !!ret;
12987                                 goto close_out;
12988                         }
12989
12990                         ret = fill_csum_tree(trans, info->csum_root,
12991                                              init_extent_tree);
12992                         err |= !!ret;
12993                         if (ret) {
12994                                 error("checksum tree refilling failed: %d", ret);
12995                                 return -EIO;
12996                         }
12997                 }
12998                 /*
12999                  * Ok now we commit and run the normal fsck, which will add
13000                  * extent entries for all of the items it finds.
13001                  */
13002                 ret = btrfs_commit_transaction(trans, info->extent_root);
13003                 err |= !!ret;
13004                 if (ret)
13005                         goto close_out;
13006         }
13007         if (!extent_buffer_uptodate(info->extent_root->node)) {
13008                 error("critical: extent_root, unable to check the filesystem");
13009                 ret = -EIO;
13010                 err |= !!ret;
13011                 goto close_out;
13012         }
13013         if (!extent_buffer_uptodate(info->csum_root->node)) {
13014                 error("critical: csum_root, unable to check the filesystem");
13015                 ret = -EIO;
13016                 err |= !!ret;
13017                 goto close_out;
13018         }
13019
13020         if (!ctx.progress_enabled)
13021                 fprintf(stderr, "checking extents\n");
13022         if (check_mode == CHECK_MODE_LOWMEM)
13023                 ret = check_chunks_and_extents_v2(root);
13024         else
13025                 ret = check_chunks_and_extents(root);
13026         err |= !!ret;
13027         if (ret)
13028                 error(
13029                 "errors found in extent allocation tree or chunk allocation");
13030
13031         ret = repair_root_items(info);
13032         err |= !!ret;
13033         if (ret < 0) {
13034                 error("failed to repair root items: %s", strerror(-ret));
13035                 goto close_out;
13036         }
13037         if (repair) {
13038                 fprintf(stderr, "Fixed %d roots.\n", ret);
13039                 ret = 0;
13040         } else if (ret > 0) {
13041                 fprintf(stderr,
13042                        "Found %d roots with an outdated root item.\n",
13043                        ret);
13044                 fprintf(stderr,
13045                         "Please run a filesystem check with the option --repair to fix them.\n");
13046                 ret = 1;
13047                 err |= !!ret;
13048                 goto close_out;
13049         }
13050
13051         if (!ctx.progress_enabled) {
13052                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13053                         fprintf(stderr, "checking free space tree\n");
13054                 else
13055                         fprintf(stderr, "checking free space cache\n");
13056         }
13057         ret = check_space_cache(root);
13058         err |= !!ret;
13059         if (ret) {
13060                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13061                         error("errors found in free space tree");
13062                 else
13063                         error("errors found in free space cache");
13064                 goto out;
13065         }
13066
13067         /*
13068          * We used to have to have these hole extents in between our real
13069          * extents so if we don't have this flag set we need to make sure there
13070          * are no gaps in the file extents for inodes, otherwise we can just
13071          * ignore it when this happens.
13072          */
13073         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13074         if (!ctx.progress_enabled)
13075                 fprintf(stderr, "checking fs roots\n");
13076         if (check_mode == CHECK_MODE_LOWMEM)
13077                 ret = check_fs_roots_v2(root->fs_info);
13078         else
13079                 ret = check_fs_roots(root, &root_cache);
13080         err |= !!ret;
13081         if (ret) {
13082                 error("errors found in fs roots");
13083                 goto out;
13084         }
13085
13086         fprintf(stderr, "checking csums\n");
13087         ret = check_csums(root);
13088         err |= !!ret;
13089         if (ret) {
13090                 error("errors found in csum tree");
13091                 goto out;
13092         }
13093
13094         fprintf(stderr, "checking root refs\n");
13095         /* For low memory mode, check_fs_roots_v2 handles root refs */
13096         if (check_mode != CHECK_MODE_LOWMEM) {
13097                 ret = check_root_refs(root, &root_cache);
13098                 err |= !!ret;
13099                 if (ret) {
13100                         error("errors found in root refs");
13101                         goto out;
13102                 }
13103         }
13104
13105         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13106                 struct extent_buffer *eb;
13107
13108                 eb = list_first_entry(&root->fs_info->recow_ebs,
13109                                       struct extent_buffer, recow);
13110                 list_del_init(&eb->recow);
13111                 ret = recow_extent_buffer(root, eb);
13112                 err |= !!ret;
13113                 if (ret) {
13114                         error("fails to fix transid errors");
13115                         break;
13116                 }
13117         }
13118
13119         while (!list_empty(&delete_items)) {
13120                 struct bad_item *bad;
13121
13122                 bad = list_first_entry(&delete_items, struct bad_item, list);
13123                 list_del_init(&bad->list);
13124                 if (repair) {
13125                         ret = delete_bad_item(root, bad);
13126                         err |= !!ret;
13127                 }
13128                 free(bad);
13129         }
13130
13131         if (info->quota_enabled) {
13132                 fprintf(stderr, "checking quota groups\n");
13133                 ret = qgroup_verify_all(info);
13134                 err |= !!ret;
13135                 if (ret) {
13136                         error("failed to check quota groups");
13137                         goto out;
13138                 }
13139                 report_qgroups(0);
13140                 ret = repair_qgroups(info, &qgroups_repaired);
13141                 err |= !!ret;
13142                 if (err) {
13143                         error("failed to repair quota groups");
13144                         goto out;
13145                 }
13146                 ret = 0;
13147         }
13148
13149         if (!list_empty(&root->fs_info->recow_ebs)) {
13150                 error("transid errors in file system");
13151                 ret = 1;
13152                 err |= !!ret;
13153         }
13154 out:
13155         printf("found %llu bytes used, ",
13156                (unsigned long long)bytes_used);
13157         if (err)
13158                 printf("error(s) found\n");
13159         else
13160                 printf("no error found\n");
13161         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13162         printf("total tree bytes: %llu\n",
13163                (unsigned long long)total_btree_bytes);
13164         printf("total fs tree bytes: %llu\n",
13165                (unsigned long long)total_fs_tree_bytes);
13166         printf("total extent tree bytes: %llu\n",
13167                (unsigned long long)total_extent_tree_bytes);
13168         printf("btree space waste bytes: %llu\n",
13169                (unsigned long long)btree_space_waste);
13170         printf("file data blocks allocated: %llu\n referenced %llu\n",
13171                 (unsigned long long)data_bytes_allocated,
13172                 (unsigned long long)data_bytes_referenced);
13173
13174         free_qgroup_counts();
13175         free_root_recs_tree(&root_cache);
13176 close_out:
13177         close_ctree(root);
13178 err_out:
13179         if (ctx.progress_enabled)
13180                 task_deinit(ctx.info);
13181
13182         return err;
13183 }