btrfs-progs: docs: add missing short option for qroup-report
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize,
833                                          root->fs_info->sectorsize));
834         }
835 }
836
837 static void print_ref_error(int errors)
838 {
839         if (errors & REF_ERR_NO_DIR_ITEM)
840                 fprintf(stderr, ", no dir item");
841         if (errors & REF_ERR_NO_DIR_INDEX)
842                 fprintf(stderr, ", no dir index");
843         if (errors & REF_ERR_NO_INODE_REF)
844                 fprintf(stderr, ", no inode ref");
845         if (errors & REF_ERR_DUP_DIR_ITEM)
846                 fprintf(stderr, ", dup dir item");
847         if (errors & REF_ERR_DUP_DIR_INDEX)
848                 fprintf(stderr, ", dup dir index");
849         if (errors & REF_ERR_DUP_INODE_REF)
850                 fprintf(stderr, ", dup inode ref");
851         if (errors & REF_ERR_INDEX_UNMATCH)
852                 fprintf(stderr, ", index mismatch");
853         if (errors & REF_ERR_FILETYPE_UNMATCH)
854                 fprintf(stderr, ", filetype mismatch");
855         if (errors & REF_ERR_NAME_TOO_LONG)
856                 fprintf(stderr, ", name too long");
857         if (errors & REF_ERR_NO_ROOT_REF)
858                 fprintf(stderr, ", no root ref");
859         if (errors & REF_ERR_NO_ROOT_BACKREF)
860                 fprintf(stderr, ", no root backref");
861         if (errors & REF_ERR_DUP_ROOT_REF)
862                 fprintf(stderr, ", dup root ref");
863         if (errors & REF_ERR_DUP_ROOT_BACKREF)
864                 fprintf(stderr, ", dup root backref");
865         fprintf(stderr, "\n");
866 }
867
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869                                           u64 ino, int mod)
870 {
871         struct ptr_node *node;
872         struct cache_extent *cache;
873         struct inode_record *rec = NULL;
874         int ret;
875
876         cache = lookup_cache_extent(inode_cache, ino, 1);
877         if (cache) {
878                 node = container_of(cache, struct ptr_node, cache);
879                 rec = node->data;
880                 if (mod && rec->refs > 1) {
881                         node->data = clone_inode_rec(rec);
882                         if (IS_ERR(node->data))
883                                 return node->data;
884                         rec->refs--;
885                         rec = node->data;
886                 }
887         } else if (mod) {
888                 rec = calloc(1, sizeof(*rec));
889                 if (!rec)
890                         return ERR_PTR(-ENOMEM);
891                 rec->ino = ino;
892                 rec->extent_start = (u64)-1;
893                 rec->refs = 1;
894                 INIT_LIST_HEAD(&rec->backrefs);
895                 INIT_LIST_HEAD(&rec->orphan_extents);
896                 rec->holes = RB_ROOT;
897
898                 node = malloc(sizeof(*node));
899                 if (!node) {
900                         free(rec);
901                         return ERR_PTR(-ENOMEM);
902                 }
903                 node->cache.start = ino;
904                 node->cache.size = 1;
905                 node->data = rec;
906
907                 if (ino == BTRFS_FREE_INO_OBJECTID)
908                         rec->found_link = 1;
909
910                 ret = insert_cache_extent(inode_cache, &node->cache);
911                 if (ret)
912                         return ERR_PTR(-EEXIST);
913         }
914         return rec;
915 }
916
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 {
919         struct orphan_data_extent *orphan;
920
921         while (!list_empty(orphan_extents)) {
922                 orphan = list_entry(orphan_extents->next,
923                                     struct orphan_data_extent, list);
924                 list_del(&orphan->list);
925                 free(orphan);
926         }
927 }
928
929 static void free_inode_rec(struct inode_record *rec)
930 {
931         struct inode_backref *backref;
932
933         if (--rec->refs > 0)
934                 return;
935
936         while (!list_empty(&rec->backrefs)) {
937                 backref = to_inode_backref(rec->backrefs.next);
938                 list_del(&backref->list);
939                 free(backref);
940         }
941         free_orphan_data_extents(&rec->orphan_extents);
942         free_file_extent_holes(&rec->holes);
943         free(rec);
944 }
945
946 static int can_free_inode_rec(struct inode_record *rec)
947 {
948         if (!rec->errors && rec->checked && rec->found_inode_item &&
949             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950                 return 1;
951         return 0;
952 }
953
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955                                  struct inode_record *rec)
956 {
957         struct cache_extent *cache;
958         struct inode_backref *tmp, *backref;
959         struct ptr_node *node;
960         u8 filetype;
961
962         if (!rec->found_inode_item)
963                 return;
964
965         filetype = imode_to_type(rec->imode);
966         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967                 if (backref->found_dir_item && backref->found_dir_index) {
968                         if (backref->filetype != filetype)
969                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970                         if (!backref->errors && backref->found_inode_ref &&
971                             rec->nlink == rec->found_link) {
972                                 list_del(&backref->list);
973                                 free(backref);
974                         }
975                 }
976         }
977
978         if (!rec->checked || rec->merging)
979                 return;
980
981         if (S_ISDIR(rec->imode)) {
982                 if (rec->found_size != rec->isize)
983                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984                 if (rec->found_file_extent)
985                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
986         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987                 if (rec->found_dir_item)
988                         rec->errors |= I_ERR_ODD_DIR_ITEM;
989                 if (rec->found_size != rec->nbytes)
990                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991                 if (rec->nlink > 0 && !no_holes &&
992                     (rec->extent_end < rec->isize ||
993                      first_extent_gap(&rec->holes) < rec->isize))
994                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995         }
996
997         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998                 if (rec->found_csum_item && rec->nodatasum)
999                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000                 if (rec->some_csum_missing && !rec->nodatasum)
1001                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002         }
1003
1004         BUG_ON(rec->refs != 1);
1005         if (can_free_inode_rec(rec)) {
1006                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007                 node = container_of(cache, struct ptr_node, cache);
1008                 BUG_ON(node->data != rec);
1009                 remove_cache_extent(inode_cache, &node->cache);
1010                 free(node);
1011                 free_inode_rec(rec);
1012         }
1013 }
1014
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 {
1017         struct btrfs_path path;
1018         struct btrfs_key key;
1019         int ret;
1020
1021         key.objectid = BTRFS_ORPHAN_OBJECTID;
1022         key.type = BTRFS_ORPHAN_ITEM_KEY;
1023         key.offset = ino;
1024
1025         btrfs_init_path(&path);
1026         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027         btrfs_release_path(&path);
1028         if (ret > 0)
1029                 ret = -ENOENT;
1030         return ret;
1031 }
1032
1033 static int process_inode_item(struct extent_buffer *eb,
1034                               int slot, struct btrfs_key *key,
1035                               struct shared_node *active_node)
1036 {
1037         struct inode_record *rec;
1038         struct btrfs_inode_item *item;
1039
1040         rec = active_node->current;
1041         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042         if (rec->found_inode_item) {
1043                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044                 return 1;
1045         }
1046         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047         rec->nlink = btrfs_inode_nlink(eb, item);
1048         rec->isize = btrfs_inode_size(eb, item);
1049         rec->nbytes = btrfs_inode_nbytes(eb, item);
1050         rec->imode = btrfs_inode_mode(eb, item);
1051         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052                 rec->nodatasum = 1;
1053         rec->found_inode_item = 1;
1054         if (rec->nlink == 0)
1055                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056         maybe_free_inode_rec(&active_node->inode_cache, rec);
1057         return 0;
1058 }
1059
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061                                                 const char *name,
1062                                                 int namelen, u64 dir)
1063 {
1064         struct inode_backref *backref;
1065
1066         list_for_each_entry(backref, &rec->backrefs, list) {
1067                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068                         break;
1069                 if (backref->dir != dir || backref->namelen != namelen)
1070                         continue;
1071                 if (memcmp(name, backref->name, namelen))
1072                         continue;
1073                 return backref;
1074         }
1075
1076         backref = malloc(sizeof(*backref) + namelen + 1);
1077         if (!backref)
1078                 return NULL;
1079         memset(backref, 0, sizeof(*backref));
1080         backref->dir = dir;
1081         backref->namelen = namelen;
1082         memcpy(backref->name, name, namelen);
1083         backref->name[namelen] = '\0';
1084         list_add_tail(&backref->list, &rec->backrefs);
1085         return backref;
1086 }
1087
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089                              u64 ino, u64 dir, u64 index,
1090                              const char *name, int namelen,
1091                              u8 filetype, u8 itemtype, int errors)
1092 {
1093         struct inode_record *rec;
1094         struct inode_backref *backref;
1095
1096         rec = get_inode_rec(inode_cache, ino, 1);
1097         BUG_ON(IS_ERR(rec));
1098         backref = get_inode_backref(rec, name, namelen, dir);
1099         BUG_ON(!backref);
1100         if (errors)
1101                 backref->errors |= errors;
1102         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103                 if (backref->found_dir_index)
1104                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105                 if (backref->found_inode_ref && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 if (backref->found_dir_item && backref->filetype != filetype)
1108                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109
1110                 backref->index = index;
1111                 backref->filetype = filetype;
1112                 backref->found_dir_index = 1;
1113         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114                 rec->found_link++;
1115                 if (backref->found_dir_item)
1116                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117                 if (backref->found_dir_index && backref->filetype != filetype)
1118                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119
1120                 backref->filetype = filetype;
1121                 backref->found_dir_item = 1;
1122         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124                 if (backref->found_inode_ref)
1125                         backref->errors |= REF_ERR_DUP_INODE_REF;
1126                 if (backref->found_dir_index && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 else
1129                         backref->index = index;
1130
1131                 backref->ref_type = itemtype;
1132                 backref->found_inode_ref = 1;
1133         } else {
1134                 BUG_ON(1);
1135         }
1136
1137         maybe_free_inode_rec(inode_cache, rec);
1138         return 0;
1139 }
1140
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142                             struct cache_tree *dst_cache)
1143 {
1144         struct inode_backref *backref;
1145         u32 dir_count = 0;
1146         int ret = 0;
1147
1148         dst->merging = 1;
1149         list_for_each_entry(backref, &src->backrefs, list) {
1150                 if (backref->found_dir_index) {
1151                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1152                                         backref->index, backref->name,
1153                                         backref->namelen, backref->filetype,
1154                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1155                 }
1156                 if (backref->found_dir_item) {
1157                         dir_count++;
1158                         add_inode_backref(dst_cache, dst->ino,
1159                                         backref->dir, 0, backref->name,
1160                                         backref->namelen, backref->filetype,
1161                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1162                 }
1163                 if (backref->found_inode_ref) {
1164                         add_inode_backref(dst_cache, dst->ino,
1165                                         backref->dir, backref->index,
1166                                         backref->name, backref->namelen, 0,
1167                                         backref->ref_type, backref->errors);
1168                 }
1169         }
1170
1171         if (src->found_dir_item)
1172                 dst->found_dir_item = 1;
1173         if (src->found_file_extent)
1174                 dst->found_file_extent = 1;
1175         if (src->found_csum_item)
1176                 dst->found_csum_item = 1;
1177         if (src->some_csum_missing)
1178                 dst->some_csum_missing = 1;
1179         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181                 if (ret < 0)
1182                         return ret;
1183         }
1184
1185         BUG_ON(src->found_link < dir_count);
1186         dst->found_link += src->found_link - dir_count;
1187         dst->found_size += src->found_size;
1188         if (src->extent_start != (u64)-1) {
1189                 if (dst->extent_start == (u64)-1) {
1190                         dst->extent_start = src->extent_start;
1191                         dst->extent_end = src->extent_end;
1192                 } else {
1193                         if (dst->extent_end > src->extent_start)
1194                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195                         else if (dst->extent_end < src->extent_start) {
1196                                 ret = add_file_extent_hole(&dst->holes,
1197                                         dst->extent_end,
1198                                         src->extent_start - dst->extent_end);
1199                         }
1200                         if (dst->extent_end < src->extent_end)
1201                                 dst->extent_end = src->extent_end;
1202                 }
1203         }
1204
1205         dst->errors |= src->errors;
1206         if (src->found_inode_item) {
1207                 if (!dst->found_inode_item) {
1208                         dst->nlink = src->nlink;
1209                         dst->isize = src->isize;
1210                         dst->nbytes = src->nbytes;
1211                         dst->imode = src->imode;
1212                         dst->nodatasum = src->nodatasum;
1213                         dst->found_inode_item = 1;
1214                 } else {
1215                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1216                 }
1217         }
1218         dst->merging = 0;
1219
1220         return 0;
1221 }
1222
1223 static int splice_shared_node(struct shared_node *src_node,
1224                               struct shared_node *dst_node)
1225 {
1226         struct cache_extent *cache;
1227         struct ptr_node *node, *ins;
1228         struct cache_tree *src, *dst;
1229         struct inode_record *rec, *conflict;
1230         u64 current_ino = 0;
1231         int splice = 0;
1232         int ret;
1233
1234         if (--src_node->refs == 0)
1235                 splice = 1;
1236         if (src_node->current)
1237                 current_ino = src_node->current->ino;
1238
1239         src = &src_node->root_cache;
1240         dst = &dst_node->root_cache;
1241 again:
1242         cache = search_cache_extent(src, 0);
1243         while (cache) {
1244                 node = container_of(cache, struct ptr_node, cache);
1245                 rec = node->data;
1246                 cache = next_cache_extent(cache);
1247
1248                 if (splice) {
1249                         remove_cache_extent(src, &node->cache);
1250                         ins = node;
1251                 } else {
1252                         ins = malloc(sizeof(*ins));
1253                         BUG_ON(!ins);
1254                         ins->cache.start = node->cache.start;
1255                         ins->cache.size = node->cache.size;
1256                         ins->data = rec;
1257                         rec->refs++;
1258                 }
1259                 ret = insert_cache_extent(dst, &ins->cache);
1260                 if (ret == -EEXIST) {
1261                         conflict = get_inode_rec(dst, rec->ino, 1);
1262                         BUG_ON(IS_ERR(conflict));
1263                         merge_inode_recs(rec, conflict, dst);
1264                         if (rec->checked) {
1265                                 conflict->checked = 1;
1266                                 if (dst_node->current == conflict)
1267                                         dst_node->current = NULL;
1268                         }
1269                         maybe_free_inode_rec(dst, conflict);
1270                         free_inode_rec(rec);
1271                         free(ins);
1272                 } else {
1273                         BUG_ON(ret);
1274                 }
1275         }
1276
1277         if (src == &src_node->root_cache) {
1278                 src = &src_node->inode_cache;
1279                 dst = &dst_node->inode_cache;
1280                 goto again;
1281         }
1282
1283         if (current_ino > 0 && (!dst_node->current ||
1284             current_ino > dst_node->current->ino)) {
1285                 if (dst_node->current) {
1286                         dst_node->current->checked = 1;
1287                         maybe_free_inode_rec(dst, dst_node->current);
1288                 }
1289                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290                 BUG_ON(IS_ERR(dst_node->current));
1291         }
1292         return 0;
1293 }
1294
1295 static void free_inode_ptr(struct cache_extent *cache)
1296 {
1297         struct ptr_node *node;
1298         struct inode_record *rec;
1299
1300         node = container_of(cache, struct ptr_node, cache);
1301         rec = node->data;
1302         free_inode_rec(rec);
1303         free(node);
1304 }
1305
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309                                             u64 bytenr)
1310 {
1311         struct cache_extent *cache;
1312         struct shared_node *node;
1313
1314         cache = lookup_cache_extent(shared, bytenr, 1);
1315         if (cache) {
1316                 node = container_of(cache, struct shared_node, cache);
1317                 return node;
1318         }
1319         return NULL;
1320 }
1321
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 {
1324         int ret;
1325         struct shared_node *node;
1326
1327         node = calloc(1, sizeof(*node));
1328         if (!node)
1329                 return -ENOMEM;
1330         node->cache.start = bytenr;
1331         node->cache.size = 1;
1332         cache_tree_init(&node->root_cache);
1333         cache_tree_init(&node->inode_cache);
1334         node->refs = refs;
1335
1336         ret = insert_cache_extent(shared, &node->cache);
1337
1338         return ret;
1339 }
1340
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342                              struct walk_control *wc, int level)
1343 {
1344         struct shared_node *node;
1345         struct shared_node *dest;
1346         int ret;
1347
1348         if (level == wc->active_node)
1349                 return 0;
1350
1351         BUG_ON(wc->active_node <= level);
1352         node = find_shared_node(&wc->shared, bytenr);
1353         if (!node) {
1354                 ret = add_shared_node(&wc->shared, bytenr, refs);
1355                 BUG_ON(ret);
1356                 node = find_shared_node(&wc->shared, bytenr);
1357                 wc->nodes[level] = node;
1358                 wc->active_node = level;
1359                 return 0;
1360         }
1361
1362         if (wc->root_level == wc->active_node &&
1363             btrfs_root_refs(&root->root_item) == 0) {
1364                 if (--node->refs == 0) {
1365                         free_inode_recs_tree(&node->root_cache);
1366                         free_inode_recs_tree(&node->inode_cache);
1367                         remove_cache_extent(&wc->shared, &node->cache);
1368                         free(node);
1369                 }
1370                 return 1;
1371         }
1372
1373         dest = wc->nodes[wc->active_node];
1374         splice_shared_node(node, dest);
1375         if (node->refs == 0) {
1376                 remove_cache_extent(&wc->shared, &node->cache);
1377                 free(node);
1378         }
1379         return 1;
1380 }
1381
1382 static int leave_shared_node(struct btrfs_root *root,
1383                              struct walk_control *wc, int level)
1384 {
1385         struct shared_node *node;
1386         struct shared_node *dest;
1387         int i;
1388
1389         if (level == wc->root_level)
1390                 return 0;
1391
1392         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393                 if (wc->nodes[i])
1394                         break;
1395         }
1396         BUG_ON(i >= BTRFS_MAX_LEVEL);
1397
1398         node = wc->nodes[wc->active_node];
1399         wc->nodes[wc->active_node] = NULL;
1400         wc->active_node = i;
1401
1402         dest = wc->nodes[wc->active_node];
1403         if (wc->active_node < wc->root_level ||
1404             btrfs_root_refs(&root->root_item) > 0) {
1405                 BUG_ON(node->refs <= 1);
1406                 splice_shared_node(node, dest);
1407         } else {
1408                 BUG_ON(node->refs < 2);
1409                 node->refs--;
1410         }
1411         return 0;
1412 }
1413
1414 /*
1415  * Returns:
1416  * < 0 - on error
1417  * 1   - if the root with id child_root_id is a child of root parent_root_id
1418  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1419  *       has other root(s) as parent(s)
1420  * 2   - if the root child_root_id doesn't have any parent roots
1421  */
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423                          u64 child_root_id)
1424 {
1425         struct btrfs_path path;
1426         struct btrfs_key key;
1427         struct extent_buffer *leaf;
1428         int has_parent = 0;
1429         int ret;
1430
1431         btrfs_init_path(&path);
1432
1433         key.objectid = parent_root_id;
1434         key.type = BTRFS_ROOT_REF_KEY;
1435         key.offset = child_root_id;
1436         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437                                 0, 0);
1438         if (ret < 0)
1439                 return ret;
1440         btrfs_release_path(&path);
1441         if (!ret)
1442                 return 1;
1443
1444         key.objectid = child_root_id;
1445         key.type = BTRFS_ROOT_BACKREF_KEY;
1446         key.offset = 0;
1447         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448                                 0, 0);
1449         if (ret < 0)
1450                 goto out;
1451
1452         while (1) {
1453                 leaf = path.nodes[0];
1454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456                         if (ret)
1457                                 break;
1458                         leaf = path.nodes[0];
1459                 }
1460
1461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462                 if (key.objectid != child_root_id ||
1463                     key.type != BTRFS_ROOT_BACKREF_KEY)
1464                         break;
1465
1466                 has_parent = 1;
1467
1468                 if (key.offset == parent_root_id) {
1469                         btrfs_release_path(&path);
1470                         return 1;
1471                 }
1472
1473                 path.slots[0]++;
1474         }
1475 out:
1476         btrfs_release_path(&path);
1477         if (ret < 0)
1478                 return ret;
1479         return has_parent ? 0 : 2;
1480 }
1481
1482 static int process_dir_item(struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (cur + sizeof(*di) + name_len > total ||
1517                     name_len > BTRFS_NAME_LEN) {
1518                         error = REF_ERR_NAME_TOO_LONG;
1519
1520                         if (cur + sizeof(*di) > total)
1521                                 break;
1522                         len = min_t(u32, total - cur - sizeof(*di),
1523                                     BTRFS_NAME_LEN);
1524                 } else {
1525                         len = name_len;
1526                         error = 0;
1527                 }
1528
1529                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530
1531                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1532                     key->offset != btrfs_name_hash(namebuf, len)) {
1533                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1534                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1535                         key->objectid, key->offset, namebuf, len, filetype,
1536                         key->offset, btrfs_name_hash(namebuf, len));
1537                 }
1538
1539                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1540                         add_inode_backref(inode_cache, location.objectid,
1541                                           key->objectid, key->offset, namebuf,
1542                                           len, filetype, key->type, error);
1543                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1544                         add_inode_backref(root_cache, location.objectid,
1545                                           key->objectid, key->offset,
1546                                           namebuf, len, filetype,
1547                                           key->type, error);
1548                 } else {
1549                         fprintf(stderr, "invalid location in dir item %u\n",
1550                                 location.type);
1551                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1552                                           key->objectid, key->offset, namebuf,
1553                                           len, filetype, key->type, error);
1554                 }
1555
1556                 len = sizeof(*di) + name_len + data_len;
1557                 di = (struct btrfs_dir_item *)((char *)di + len);
1558                 cur += len;
1559         }
1560         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1561                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1562
1563         return 0;
1564 }
1565
1566 static int process_inode_ref(struct extent_buffer *eb,
1567                              int slot, struct btrfs_key *key,
1568                              struct shared_node *active_node)
1569 {
1570         u32 total;
1571         u32 cur = 0;
1572         u32 len;
1573         u32 name_len;
1574         u64 index;
1575         int error;
1576         struct cache_tree *inode_cache;
1577         struct btrfs_inode_ref *ref;
1578         char namebuf[BTRFS_NAME_LEN];
1579
1580         inode_cache = &active_node->inode_cache;
1581
1582         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1583         total = btrfs_item_size_nr(eb, slot);
1584         while (cur < total) {
1585                 name_len = btrfs_inode_ref_name_len(eb, ref);
1586                 index = btrfs_inode_ref_index(eb, ref);
1587
1588                 /* inode_ref + namelen should not cross item boundary */
1589                 if (cur + sizeof(*ref) + name_len > total ||
1590                     name_len > BTRFS_NAME_LEN) {
1591                         if (total < cur + sizeof(*ref))
1592                                 break;
1593
1594                         /* Still try to read out the remaining part */
1595                         len = min_t(u32, total - cur - sizeof(*ref),
1596                                     BTRFS_NAME_LEN);
1597                         error = REF_ERR_NAME_TOO_LONG;
1598                 } else {
1599                         len = name_len;
1600                         error = 0;
1601                 }
1602
1603                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, key->offset,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*ref) + name_len;
1608                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612 }
1613
1614 static int process_inode_extref(struct extent_buffer *eb,
1615                                 int slot, struct btrfs_key *key,
1616                                 struct shared_node *active_node)
1617 {
1618         u32 total;
1619         u32 cur = 0;
1620         u32 len;
1621         u32 name_len;
1622         u64 index;
1623         u64 parent;
1624         int error;
1625         struct cache_tree *inode_cache;
1626         struct btrfs_inode_extref *extref;
1627         char namebuf[BTRFS_NAME_LEN];
1628
1629         inode_cache = &active_node->inode_cache;
1630
1631         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1632         total = btrfs_item_size_nr(eb, slot);
1633         while (cur < total) {
1634                 name_len = btrfs_inode_extref_name_len(eb, extref);
1635                 index = btrfs_inode_extref_index(eb, extref);
1636                 parent = btrfs_inode_extref_parent(eb, extref);
1637                 if (name_len <= BTRFS_NAME_LEN) {
1638                         len = name_len;
1639                         error = 0;
1640                 } else {
1641                         len = BTRFS_NAME_LEN;
1642                         error = REF_ERR_NAME_TOO_LONG;
1643                 }
1644                 read_extent_buffer(eb, namebuf,
1645                                    (unsigned long)(extref + 1), len);
1646                 add_inode_backref(inode_cache, key->objectid, parent,
1647                                   index, namebuf, len, 0, key->type, error);
1648
1649                 len = sizeof(*extref) + name_len;
1650                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1651                 cur += len;
1652         }
1653         return 0;
1654
1655 }
1656
1657 static int count_csum_range(struct btrfs_root *root, u64 start,
1658                             u64 len, u64 *found)
1659 {
1660         struct btrfs_key key;
1661         struct btrfs_path path;
1662         struct extent_buffer *leaf;
1663         int ret;
1664         size_t size;
1665         *found = 0;
1666         u64 csum_end;
1667         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1668
1669         btrfs_init_path(&path);
1670
1671         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672         key.offset = start;
1673         key.type = BTRFS_EXTENT_CSUM_KEY;
1674
1675         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1676                                 &key, &path, 0, 0);
1677         if (ret < 0)
1678                 goto out;
1679         if (ret > 0 && path.slots[0] > 0) {
1680                 leaf = path.nodes[0];
1681                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1682                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1683                     key.type == BTRFS_EXTENT_CSUM_KEY)
1684                         path.slots[0]--;
1685         }
1686
1687         while (len > 0) {
1688                 leaf = path.nodes[0];
1689                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1690                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1691                         if (ret > 0)
1692                                 break;
1693                         else if (ret < 0)
1694                                 goto out;
1695                         leaf = path.nodes[0];
1696                 }
1697
1698                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1699                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1700                     key.type != BTRFS_EXTENT_CSUM_KEY)
1701                         break;
1702
1703                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1704                 if (key.offset >= start + len)
1705                         break;
1706
1707                 if (key.offset > start)
1708                         start = key.offset;
1709
1710                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1711                 csum_end = key.offset + (size / csum_size) *
1712                            root->fs_info->sectorsize;
1713                 if (csum_end > start) {
1714                         size = min(csum_end - start, len);
1715                         len -= size;
1716                         start += size;
1717                         *found += size;
1718                 }
1719
1720                 path.slots[0]++;
1721         }
1722 out:
1723         btrfs_release_path(&path);
1724         if (ret < 0)
1725                 return ret;
1726         return 0;
1727 }
1728
1729 static int process_file_extent(struct btrfs_root *root,
1730                                 struct extent_buffer *eb,
1731                                 int slot, struct btrfs_key *key,
1732                                 struct shared_node *active_node)
1733 {
1734         struct inode_record *rec;
1735         struct btrfs_file_extent_item *fi;
1736         u64 num_bytes = 0;
1737         u64 disk_bytenr = 0;
1738         u64 extent_offset = 0;
1739         u64 mask = root->fs_info->sectorsize - 1;
1740         int extent_type;
1741         int ret;
1742
1743         rec = active_node->current;
1744         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1745         rec->found_file_extent = 1;
1746
1747         if (rec->extent_start == (u64)-1) {
1748                 rec->extent_start = key->offset;
1749                 rec->extent_end = key->offset;
1750         }
1751
1752         if (rec->extent_end > key->offset)
1753                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1754         else if (rec->extent_end < key->offset) {
1755                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1756                                            key->offset - rec->extent_end);
1757                 if (ret < 0)
1758                         return ret;
1759         }
1760
1761         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1762         extent_type = btrfs_file_extent_type(eb, fi);
1763
1764         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1765                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1766                 if (num_bytes == 0)
1767                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1768                 rec->found_size += num_bytes;
1769                 num_bytes = (num_bytes + mask) & ~mask;
1770         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1771                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1772                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1773                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1774                 extent_offset = btrfs_file_extent_offset(eb, fi);
1775                 if (num_bytes == 0 || (num_bytes & mask))
1776                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777                 if (num_bytes + extent_offset >
1778                     btrfs_file_extent_ram_bytes(eb, fi))
1779                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1781                     (btrfs_file_extent_compression(eb, fi) ||
1782                      btrfs_file_extent_encryption(eb, fi) ||
1783                      btrfs_file_extent_other_encoding(eb, fi)))
1784                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785                 if (disk_bytenr > 0)
1786                         rec->found_size += num_bytes;
1787         } else {
1788                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1789         }
1790         rec->extent_end = key->offset + num_bytes;
1791
1792         /*
1793          * The data reloc tree will copy full extents into its inode and then
1794          * copy the corresponding csums.  Because the extent it copied could be
1795          * a preallocated extent that hasn't been written to yet there may be no
1796          * csums to copy, ergo we won't have csums for our file extent.  This is
1797          * ok so just don't bother checking csums if the inode belongs to the
1798          * data reloc tree.
1799          */
1800         if (disk_bytenr > 0 &&
1801             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1802                 u64 found;
1803                 if (btrfs_file_extent_compression(eb, fi))
1804                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1805                 else
1806                         disk_bytenr += extent_offset;
1807
1808                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809                 if (ret < 0)
1810                         return ret;
1811                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1812                         if (found > 0)
1813                                 rec->found_csum_item = 1;
1814                         if (found < num_bytes)
1815                                 rec->some_csum_missing = 1;
1816                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1817                         if (found > 0)
1818                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1819                 }
1820         }
1821         return 0;
1822 }
1823
1824 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1825                             struct walk_control *wc)
1826 {
1827         struct btrfs_key key;
1828         u32 nritems;
1829         int i;
1830         int ret = 0;
1831         struct cache_tree *inode_cache;
1832         struct shared_node *active_node;
1833
1834         if (wc->root_level == wc->active_node &&
1835             btrfs_root_refs(&root->root_item) == 0)
1836                 return 0;
1837
1838         active_node = wc->nodes[wc->active_node];
1839         inode_cache = &active_node->inode_cache;
1840         nritems = btrfs_header_nritems(eb);
1841         for (i = 0; i < nritems; i++) {
1842                 btrfs_item_key_to_cpu(eb, &key, i);
1843
1844                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1845                         continue;
1846                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847                         continue;
1848
1849                 if (active_node->current == NULL ||
1850                     active_node->current->ino < key.objectid) {
1851                         if (active_node->current) {
1852                                 active_node->current->checked = 1;
1853                                 maybe_free_inode_rec(inode_cache,
1854                                                      active_node->current);
1855                         }
1856                         active_node->current = get_inode_rec(inode_cache,
1857                                                              key.objectid, 1);
1858                         BUG_ON(IS_ERR(active_node->current));
1859                 }
1860                 switch (key.type) {
1861                 case BTRFS_DIR_ITEM_KEY:
1862                 case BTRFS_DIR_INDEX_KEY:
1863                         ret = process_dir_item(eb, i, &key, active_node);
1864                         break;
1865                 case BTRFS_INODE_REF_KEY:
1866                         ret = process_inode_ref(eb, i, &key, active_node);
1867                         break;
1868                 case BTRFS_INODE_EXTREF_KEY:
1869                         ret = process_inode_extref(eb, i, &key, active_node);
1870                         break;
1871                 case BTRFS_INODE_ITEM_KEY:
1872                         ret = process_inode_item(eb, i, &key, active_node);
1873                         break;
1874                 case BTRFS_EXTENT_DATA_KEY:
1875                         ret = process_file_extent(root, eb, i, &key,
1876                                                   active_node);
1877                         break;
1878                 default:
1879                         break;
1880                 };
1881         }
1882         return ret;
1883 }
1884
1885 struct node_refs {
1886         u64 bytenr[BTRFS_MAX_LEVEL];
1887         u64 refs[BTRFS_MAX_LEVEL];
1888         int need_check[BTRFS_MAX_LEVEL];
1889 };
1890
1891 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1892                              struct node_refs *nrefs, u64 level);
1893 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1894                             unsigned int ext_ref);
1895
1896 /*
1897  * Returns >0  Found error, not fatal, should continue
1898  * Returns <0  Fatal error, must exit the whole check
1899  * Returns 0   No errors found
1900  */
1901 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1902                                struct node_refs *nrefs, int *level, int ext_ref)
1903 {
1904         struct extent_buffer *cur = path->nodes[0];
1905         struct btrfs_key key;
1906         u64 cur_bytenr;
1907         u32 nritems;
1908         u64 first_ino = 0;
1909         int root_level = btrfs_header_level(root->node);
1910         int i;
1911         int ret = 0; /* Final return value */
1912         int err = 0; /* Positive error bitmap */
1913
1914         cur_bytenr = cur->start;
1915
1916         /* skip to first inode item or the first inode number change */
1917         nritems = btrfs_header_nritems(cur);
1918         for (i = 0; i < nritems; i++) {
1919                 btrfs_item_key_to_cpu(cur, &key, i);
1920                 if (i == 0)
1921                         first_ino = key.objectid;
1922                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1923                     (first_ino && first_ino != key.objectid))
1924                         break;
1925         }
1926         if (i == nritems) {
1927                 path->slots[0] = nritems;
1928                 return 0;
1929         }
1930         path->slots[0] = i;
1931
1932 again:
1933         err |= check_inode_item(root, path, ext_ref);
1934
1935         if (err & LAST_ITEM)
1936                 goto out;
1937
1938         /* still have inode items in thie leaf */
1939         if (cur->start == cur_bytenr)
1940                 goto again;
1941
1942         /*
1943          * we have switched to another leaf, above nodes may
1944          * have changed, here walk down the path, if a node
1945          * or leaf is shared, check whether we can skip this
1946          * node or leaf.
1947          */
1948         for (i = root_level; i >= 0; i--) {
1949                 if (path->nodes[i]->start == nrefs->bytenr[i])
1950                         continue;
1951
1952                 ret = update_nodes_refs(root,
1953                                 path->nodes[i]->start,
1954                                 nrefs, i);
1955                 if (ret)
1956                         goto out;
1957
1958                 if (!nrefs->need_check[i]) {
1959                         *level += 1;
1960                         break;
1961                 }
1962         }
1963
1964         for (i = 0; i < *level; i++) {
1965                 free_extent_buffer(path->nodes[i]);
1966                 path->nodes[i] = NULL;
1967         }
1968 out:
1969         err &= ~LAST_ITEM;
1970         if (err && !ret)
1971                 ret = err;
1972         return ret;
1973 }
1974
1975 static void reada_walk_down(struct btrfs_root *root,
1976                             struct extent_buffer *node, int slot)
1977 {
1978         struct btrfs_fs_info *fs_info = root->fs_info;
1979         u64 bytenr;
1980         u64 ptr_gen;
1981         u32 nritems;
1982         u32 blocksize;
1983         int i;
1984         int level;
1985
1986         level = btrfs_header_level(node);
1987         if (level != 1)
1988                 return;
1989
1990         nritems = btrfs_header_nritems(node);
1991         blocksize = fs_info->nodesize;
1992         for (i = slot; i < nritems; i++) {
1993                 bytenr = btrfs_node_blockptr(node, i);
1994                 ptr_gen = btrfs_node_ptr_generation(node, i);
1995                 readahead_tree_block(fs_info, bytenr, blocksize, ptr_gen);
1996         }
1997 }
1998
1999 /*
2000  * Check the child node/leaf by the following condition:
2001  * 1. the first item key of the node/leaf should be the same with the one
2002  *    in parent.
2003  * 2. block in parent node should match the child node/leaf.
2004  * 3. generation of parent node and child's header should be consistent.
2005  *
2006  * Or the child node/leaf pointed by the key in parent is not valid.
2007  *
2008  * We hope to check leaf owner too, but since subvol may share leaves,
2009  * which makes leaf owner check not so strong, key check should be
2010  * sufficient enough for that case.
2011  */
2012 static int check_child_node(struct extent_buffer *parent, int slot,
2013                             struct extent_buffer *child)
2014 {
2015         struct btrfs_key parent_key;
2016         struct btrfs_key child_key;
2017         int ret = 0;
2018
2019         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2020         if (btrfs_header_level(child) == 0)
2021                 btrfs_item_key_to_cpu(child, &child_key, 0);
2022         else
2023                 btrfs_node_key_to_cpu(child, &child_key, 0);
2024
2025         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2026                 ret = -EINVAL;
2027                 fprintf(stderr,
2028                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2029                         parent_key.objectid, parent_key.type, parent_key.offset,
2030                         child_key.objectid, child_key.type, child_key.offset);
2031         }
2032         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2033                 ret = -EINVAL;
2034                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2035                         btrfs_node_blockptr(parent, slot),
2036                         btrfs_header_bytenr(child));
2037         }
2038         if (btrfs_node_ptr_generation(parent, slot) !=
2039             btrfs_header_generation(child)) {
2040                 ret = -EINVAL;
2041                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2042                         btrfs_header_generation(child),
2043                         btrfs_node_ptr_generation(parent, slot));
2044         }
2045         return ret;
2046 }
2047
2048 /*
2049  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2050  * in every fs or file tree check. Here we find its all root ids, and only check
2051  * it in the fs or file tree which has the smallest root id.
2052  */
2053 static int need_check(struct btrfs_root *root, struct ulist *roots)
2054 {
2055         struct rb_node *node;
2056         struct ulist_node *u;
2057
2058         if (roots->nnodes == 1)
2059                 return 1;
2060
2061         node = rb_first(&roots->root);
2062         u = rb_entry(node, struct ulist_node, rb_node);
2063         /*
2064          * current root id is not smallest, we skip it and let it be checked
2065          * in the fs or file tree who hash the smallest root id.
2066          */
2067         if (root->objectid != u->val)
2068                 return 0;
2069
2070         return 1;
2071 }
2072
2073 /*
2074  * for a tree node or leaf, we record its reference count, so later if we still
2075  * process this node or leaf, don't need to compute its reference count again.
2076  */
2077 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2078                              struct node_refs *nrefs, u64 level)
2079 {
2080         int check, ret;
2081         u64 refs;
2082         struct ulist *roots;
2083
2084         if (nrefs->bytenr[level] != bytenr) {
2085                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2086                                        level, 1, &refs, NULL);
2087                 if (ret < 0)
2088                         return ret;
2089
2090                 nrefs->bytenr[level] = bytenr;
2091                 nrefs->refs[level] = refs;
2092                 if (refs > 1) {
2093                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2094                                                    0, &roots);
2095                         if (ret)
2096                                 return -EIO;
2097
2098                         check = need_check(root, roots);
2099                         ulist_free(roots);
2100                         nrefs->need_check[level] = check;
2101                 } else {
2102                         nrefs->need_check[level] = 1;
2103                 }
2104         }
2105
2106         return 0;
2107 }
2108
2109 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2110                           struct walk_control *wc, int *level,
2111                           struct node_refs *nrefs)
2112 {
2113         enum btrfs_tree_block_status status;
2114         u64 bytenr;
2115         u64 ptr_gen;
2116         struct btrfs_fs_info *fs_info = root->fs_info;
2117         struct extent_buffer *next;
2118         struct extent_buffer *cur;
2119         u32 blocksize;
2120         int ret, err = 0;
2121         u64 refs;
2122
2123         WARN_ON(*level < 0);
2124         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2125
2126         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2127                 refs = nrefs->refs[*level];
2128                 ret = 0;
2129         } else {
2130                 ret = btrfs_lookup_extent_info(NULL, root,
2131                                        path->nodes[*level]->start,
2132                                        *level, 1, &refs, NULL);
2133                 if (ret < 0) {
2134                         err = ret;
2135                         goto out;
2136                 }
2137                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2138                 nrefs->refs[*level] = refs;
2139         }
2140
2141         if (refs > 1) {
2142                 ret = enter_shared_node(root, path->nodes[*level]->start,
2143                                         refs, wc, *level);
2144                 if (ret > 0) {
2145                         err = ret;
2146                         goto out;
2147                 }
2148         }
2149
2150         while (*level >= 0) {
2151                 WARN_ON(*level < 0);
2152                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2153                 cur = path->nodes[*level];
2154
2155                 if (btrfs_header_level(cur) != *level)
2156                         WARN_ON(1);
2157
2158                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2159                         break;
2160                 if (*level == 0) {
2161                         ret = process_one_leaf(root, cur, wc);
2162                         if (ret < 0)
2163                                 err = ret;
2164                         break;
2165                 }
2166                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2167                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2168                 blocksize = fs_info->nodesize;
2169
2170                 if (bytenr == nrefs->bytenr[*level - 1]) {
2171                         refs = nrefs->refs[*level - 1];
2172                 } else {
2173                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174                                         *level - 1, 1, &refs, NULL);
2175                         if (ret < 0) {
2176                                 refs = 0;
2177                         } else {
2178                                 nrefs->bytenr[*level - 1] = bytenr;
2179                                 nrefs->refs[*level - 1] = refs;
2180                         }
2181                 }
2182
2183                 if (refs > 1) {
2184                         ret = enter_shared_node(root, bytenr, refs,
2185                                                 wc, *level - 1);
2186                         if (ret > 0) {
2187                                 path->slots[*level]++;
2188                                 continue;
2189                         }
2190                 }
2191
2192                 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2193                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2194                         free_extent_buffer(next);
2195                         reada_walk_down(root, cur, path->slots[*level]);
2196                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2197                                                ptr_gen);
2198                         if (!extent_buffer_uptodate(next)) {
2199                                 struct btrfs_key node_key;
2200
2201                                 btrfs_node_key_to_cpu(path->nodes[*level],
2202                                                       &node_key,
2203                                                       path->slots[*level]);
2204                                 btrfs_add_corrupt_extent_record(root->fs_info,
2205                                                 &node_key,
2206                                                 path->nodes[*level]->start,
2207                                                 root->fs_info->nodesize,
2208                                                 *level);
2209                                 err = -EIO;
2210                                 goto out;
2211                         }
2212                 }
2213
2214                 ret = check_child_node(cur, path->slots[*level], next);
2215                 if (ret) {
2216                         free_extent_buffer(next);
2217                         err = ret;
2218                         goto out;
2219                 }
2220
2221                 if (btrfs_is_leaf(next))
2222                         status = btrfs_check_leaf(root, NULL, next);
2223                 else
2224                         status = btrfs_check_node(root, NULL, next);
2225                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2226                         free_extent_buffer(next);
2227                         err = -EIO;
2228                         goto out;
2229                 }
2230
2231                 *level = *level - 1;
2232                 free_extent_buffer(path->nodes[*level]);
2233                 path->nodes[*level] = next;
2234                 path->slots[*level] = 0;
2235         }
2236 out:
2237         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2238         return err;
2239 }
2240
2241 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2242                             unsigned int ext_ref);
2243
2244 /*
2245  * Returns >0  Found error, should continue
2246  * Returns <0  Fatal error, must exit the whole check
2247  * Returns 0   No errors found
2248  */
2249 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2250                              int *level, struct node_refs *nrefs, int ext_ref)
2251 {
2252         enum btrfs_tree_block_status status;
2253         u64 bytenr;
2254         u64 ptr_gen;
2255         struct btrfs_fs_info *fs_info = root->fs_info;
2256         struct extent_buffer *next;
2257         struct extent_buffer *cur;
2258         u32 blocksize;
2259         int ret;
2260
2261         WARN_ON(*level < 0);
2262         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2263
2264         ret = update_nodes_refs(root, path->nodes[*level]->start,
2265                                 nrefs, *level);
2266         if (ret < 0)
2267                 return ret;
2268
2269         while (*level >= 0) {
2270                 WARN_ON(*level < 0);
2271                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2272                 cur = path->nodes[*level];
2273
2274                 if (btrfs_header_level(cur) != *level)
2275                         WARN_ON(1);
2276
2277                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2278                         break;
2279                 /* Don't forgot to check leaf/node validation */
2280                 if (*level == 0) {
2281                         ret = btrfs_check_leaf(root, NULL, cur);
2282                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2283                                 ret = -EIO;
2284                                 break;
2285                         }
2286                         ret = process_one_leaf_v2(root, path, nrefs,
2287                                                   level, ext_ref);
2288                         break;
2289                 } else {
2290                         ret = btrfs_check_node(root, NULL, cur);
2291                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2292                                 ret = -EIO;
2293                                 break;
2294                         }
2295                 }
2296                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2297                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2298                 blocksize = fs_info->nodesize;
2299
2300                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2301                 if (ret)
2302                         break;
2303                 if (!nrefs->need_check[*level - 1]) {
2304                         path->slots[*level]++;
2305                         continue;
2306                 }
2307
2308                 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2309                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2310                         free_extent_buffer(next);
2311                         reada_walk_down(root, cur, path->slots[*level]);
2312                         next = read_tree_block(fs_info, bytenr, blocksize,
2313                                                ptr_gen);
2314                         if (!extent_buffer_uptodate(next)) {
2315                                 struct btrfs_key node_key;
2316
2317                                 btrfs_node_key_to_cpu(path->nodes[*level],
2318                                                       &node_key,
2319                                                       path->slots[*level]);
2320                                 btrfs_add_corrupt_extent_record(fs_info,
2321                                                 &node_key,
2322                                                 path->nodes[*level]->start,
2323                                                 fs_info->nodesize,
2324                                                 *level);
2325                                 ret = -EIO;
2326                                 break;
2327                         }
2328                 }
2329
2330                 ret = check_child_node(cur, path->slots[*level], next);
2331                 if (ret < 0) 
2332                         break;
2333
2334                 if (btrfs_is_leaf(next))
2335                         status = btrfs_check_leaf(root, NULL, next);
2336                 else
2337                         status = btrfs_check_node(root, NULL, next);
2338                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2339                         free_extent_buffer(next);
2340                         ret = -EIO;
2341                         break;
2342                 }
2343
2344                 *level = *level - 1;
2345                 free_extent_buffer(path->nodes[*level]);
2346                 path->nodes[*level] = next;
2347                 path->slots[*level] = 0;
2348         }
2349         return ret;
2350 }
2351
2352 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2353                         struct walk_control *wc, int *level)
2354 {
2355         int i;
2356         struct extent_buffer *leaf;
2357
2358         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2359                 leaf = path->nodes[i];
2360                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2361                         path->slots[i]++;
2362                         *level = i;
2363                         return 0;
2364                 } else {
2365                         free_extent_buffer(path->nodes[*level]);
2366                         path->nodes[*level] = NULL;
2367                         BUG_ON(*level > wc->active_node);
2368                         if (*level == wc->active_node)
2369                                 leave_shared_node(root, wc, *level);
2370                         *level = i + 1;
2371                 }
2372         }
2373         return 1;
2374 }
2375
2376 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2377                            int *level)
2378 {
2379         int i;
2380         struct extent_buffer *leaf;
2381
2382         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2383                 leaf = path->nodes[i];
2384                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2385                         path->slots[i]++;
2386                         *level = i;
2387                         return 0;
2388                 } else {
2389                         free_extent_buffer(path->nodes[*level]);
2390                         path->nodes[*level] = NULL;
2391                         *level = i + 1;
2392                 }
2393         }
2394         return 1;
2395 }
2396
2397 static int check_root_dir(struct inode_record *rec)
2398 {
2399         struct inode_backref *backref;
2400         int ret = -1;
2401
2402         if (!rec->found_inode_item || rec->errors)
2403                 goto out;
2404         if (rec->nlink != 1 || rec->found_link != 0)
2405                 goto out;
2406         if (list_empty(&rec->backrefs))
2407                 goto out;
2408         backref = to_inode_backref(rec->backrefs.next);
2409         if (!backref->found_inode_ref)
2410                 goto out;
2411         if (backref->index != 0 || backref->namelen != 2 ||
2412             memcmp(backref->name, "..", 2))
2413                 goto out;
2414         if (backref->found_dir_index || backref->found_dir_item)
2415                 goto out;
2416         ret = 0;
2417 out:
2418         return ret;
2419 }
2420
2421 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2422                               struct btrfs_root *root, struct btrfs_path *path,
2423                               struct inode_record *rec)
2424 {
2425         struct btrfs_inode_item *ei;
2426         struct btrfs_key key;
2427         int ret;
2428
2429         key.objectid = rec->ino;
2430         key.type = BTRFS_INODE_ITEM_KEY;
2431         key.offset = (u64)-1;
2432
2433         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2434         if (ret < 0)
2435                 goto out;
2436         if (ret) {
2437                 if (!path->slots[0]) {
2438                         ret = -ENOENT;
2439                         goto out;
2440                 }
2441                 path->slots[0]--;
2442                 ret = 0;
2443         }
2444         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2445         if (key.objectid != rec->ino) {
2446                 ret = -ENOENT;
2447                 goto out;
2448         }
2449
2450         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2451                             struct btrfs_inode_item);
2452         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2453         btrfs_mark_buffer_dirty(path->nodes[0]);
2454         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2455         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2456                root->root_key.objectid);
2457 out:
2458         btrfs_release_path(path);
2459         return ret;
2460 }
2461
2462 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2463                                     struct btrfs_root *root,
2464                                     struct btrfs_path *path,
2465                                     struct inode_record *rec)
2466 {
2467         int ret;
2468
2469         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2470         btrfs_release_path(path);
2471         if (!ret)
2472                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2473         return ret;
2474 }
2475
2476 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2477                                struct btrfs_root *root,
2478                                struct btrfs_path *path,
2479                                struct inode_record *rec)
2480 {
2481         struct btrfs_inode_item *ei;
2482         struct btrfs_key key;
2483         int ret = 0;
2484
2485         key.objectid = rec->ino;
2486         key.type = BTRFS_INODE_ITEM_KEY;
2487         key.offset = 0;
2488
2489         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2490         if (ret) {
2491                 if (ret > 0)
2492                         ret = -ENOENT;
2493                 goto out;
2494         }
2495
2496         /* Since ret == 0, no need to check anything */
2497         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2498                             struct btrfs_inode_item);
2499         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2500         btrfs_mark_buffer_dirty(path->nodes[0]);
2501         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2502         printf("reset nbytes for ino %llu root %llu\n",
2503                rec->ino, root->root_key.objectid);
2504 out:
2505         btrfs_release_path(path);
2506         return ret;
2507 }
2508
2509 static int add_missing_dir_index(struct btrfs_root *root,
2510                                  struct cache_tree *inode_cache,
2511                                  struct inode_record *rec,
2512                                  struct inode_backref *backref)
2513 {
2514         struct btrfs_path path;
2515         struct btrfs_trans_handle *trans;
2516         struct btrfs_dir_item *dir_item;
2517         struct extent_buffer *leaf;
2518         struct btrfs_key key;
2519         struct btrfs_disk_key disk_key;
2520         struct inode_record *dir_rec;
2521         unsigned long name_ptr;
2522         u32 data_size = sizeof(*dir_item) + backref->namelen;
2523         int ret;
2524
2525         trans = btrfs_start_transaction(root, 1);
2526         if (IS_ERR(trans))
2527                 return PTR_ERR(trans);
2528
2529         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2530                 (unsigned long long)rec->ino);
2531
2532         btrfs_init_path(&path);
2533         key.objectid = backref->dir;
2534         key.type = BTRFS_DIR_INDEX_KEY;
2535         key.offset = backref->index;
2536         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2537         BUG_ON(ret);
2538
2539         leaf = path.nodes[0];
2540         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2541
2542         disk_key.objectid = cpu_to_le64(rec->ino);
2543         disk_key.type = BTRFS_INODE_ITEM_KEY;
2544         disk_key.offset = 0;
2545
2546         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2547         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2548         btrfs_set_dir_data_len(leaf, dir_item, 0);
2549         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2550         name_ptr = (unsigned long)(dir_item + 1);
2551         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2552         btrfs_mark_buffer_dirty(leaf);
2553         btrfs_release_path(&path);
2554         btrfs_commit_transaction(trans, root);
2555
2556         backref->found_dir_index = 1;
2557         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2558         BUG_ON(IS_ERR(dir_rec));
2559         if (!dir_rec)
2560                 return 0;
2561         dir_rec->found_size += backref->namelen;
2562         if (dir_rec->found_size == dir_rec->isize &&
2563             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2564                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2565         if (dir_rec->found_size != dir_rec->isize)
2566                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2567
2568         return 0;
2569 }
2570
2571 static int delete_dir_index(struct btrfs_root *root,
2572                             struct inode_backref *backref)
2573 {
2574         struct btrfs_trans_handle *trans;
2575         struct btrfs_dir_item *di;
2576         struct btrfs_path path;
2577         int ret = 0;
2578
2579         trans = btrfs_start_transaction(root, 1);
2580         if (IS_ERR(trans))
2581                 return PTR_ERR(trans);
2582
2583         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2584                 (unsigned long long)backref->dir,
2585                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2586                 (unsigned long long)root->objectid);
2587
2588         btrfs_init_path(&path);
2589         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2590                                     backref->name, backref->namelen,
2591                                     backref->index, -1);
2592         if (IS_ERR(di)) {
2593                 ret = PTR_ERR(di);
2594                 btrfs_release_path(&path);
2595                 btrfs_commit_transaction(trans, root);
2596                 if (ret == -ENOENT)
2597                         return 0;
2598                 return ret;
2599         }
2600
2601         if (!di)
2602                 ret = btrfs_del_item(trans, root, &path);
2603         else
2604                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2605         BUG_ON(ret);
2606         btrfs_release_path(&path);
2607         btrfs_commit_transaction(trans, root);
2608         return ret;
2609 }
2610
2611 static int create_inode_item(struct btrfs_root *root,
2612                              struct inode_record *rec,
2613                              int root_dir)
2614 {
2615         struct btrfs_trans_handle *trans;
2616         struct btrfs_inode_item inode_item;
2617         time_t now = time(NULL);
2618         int ret;
2619
2620         trans = btrfs_start_transaction(root, 1);
2621         if (IS_ERR(trans)) {
2622                 ret = PTR_ERR(trans);
2623                 return ret;
2624         }
2625
2626         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2627                 "be incomplete, please check permissions and content after "
2628                 "the fsck completes.\n", (unsigned long long)root->objectid,
2629                 (unsigned long long)rec->ino);
2630
2631         memset(&inode_item, 0, sizeof(inode_item));
2632         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2633         if (root_dir)
2634                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2635         else
2636                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2637         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2638         if (rec->found_dir_item) {
2639                 if (rec->found_file_extent)
2640                         fprintf(stderr, "root %llu inode %llu has both a dir "
2641                                 "item and extents, unsure if it is a dir or a "
2642                                 "regular file so setting it as a directory\n",
2643                                 (unsigned long long)root->objectid,
2644                                 (unsigned long long)rec->ino);
2645                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2646                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2647         } else if (!rec->found_dir_item) {
2648                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2649                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2650         }
2651         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2652         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2653         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2654         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2655         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2656         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2657         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2658         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2659
2660         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2661         BUG_ON(ret);
2662         btrfs_commit_transaction(trans, root);
2663         return 0;
2664 }
2665
2666 static int repair_inode_backrefs(struct btrfs_root *root,
2667                                  struct inode_record *rec,
2668                                  struct cache_tree *inode_cache,
2669                                  int delete)
2670 {
2671         struct inode_backref *tmp, *backref;
2672         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2673         int ret = 0;
2674         int repaired = 0;
2675
2676         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2677                 if (!delete && rec->ino == root_dirid) {
2678                         if (!rec->found_inode_item) {
2679                                 ret = create_inode_item(root, rec, 1);
2680                                 if (ret)
2681                                         break;
2682                                 repaired++;
2683                         }
2684                 }
2685
2686                 /* Index 0 for root dir's are special, don't mess with it */
2687                 if (rec->ino == root_dirid && backref->index == 0)
2688                         continue;
2689
2690                 if (delete &&
2691                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2692                      (backref->found_dir_index && backref->found_inode_ref &&
2693                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2694                         ret = delete_dir_index(root, backref);
2695                         if (ret)
2696                                 break;
2697                         repaired++;
2698                         list_del(&backref->list);
2699                         free(backref);
2700                         continue;
2701                 }
2702
2703                 if (!delete && !backref->found_dir_index &&
2704                     backref->found_dir_item && backref->found_inode_ref) {
2705                         ret = add_missing_dir_index(root, inode_cache, rec,
2706                                                     backref);
2707                         if (ret)
2708                                 break;
2709                         repaired++;
2710                         if (backref->found_dir_item &&
2711                             backref->found_dir_index) {
2712                                 if (!backref->errors &&
2713                                     backref->found_inode_ref) {
2714                                         list_del(&backref->list);
2715                                         free(backref);
2716                                         continue;
2717                                 }
2718                         }
2719                 }
2720
2721                 if (!delete && (!backref->found_dir_index &&
2722                                 !backref->found_dir_item &&
2723                                 backref->found_inode_ref)) {
2724                         struct btrfs_trans_handle *trans;
2725                         struct btrfs_key location;
2726
2727                         ret = check_dir_conflict(root, backref->name,
2728                                                  backref->namelen,
2729                                                  backref->dir,
2730                                                  backref->index);
2731                         if (ret) {
2732                                 /*
2733                                  * let nlink fixing routine to handle it,
2734                                  * which can do it better.
2735                                  */
2736                                 ret = 0;
2737                                 break;
2738                         }
2739                         location.objectid = rec->ino;
2740                         location.type = BTRFS_INODE_ITEM_KEY;
2741                         location.offset = 0;
2742
2743                         trans = btrfs_start_transaction(root, 1);
2744                         if (IS_ERR(trans)) {
2745                                 ret = PTR_ERR(trans);
2746                                 break;
2747                         }
2748                         fprintf(stderr, "adding missing dir index/item pair "
2749                                 "for inode %llu\n",
2750                                 (unsigned long long)rec->ino);
2751                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2752                                                     backref->namelen,
2753                                                     backref->dir, &location,
2754                                                     imode_to_type(rec->imode),
2755                                                     backref->index);
2756                         BUG_ON(ret);
2757                         btrfs_commit_transaction(trans, root);
2758                         repaired++;
2759                 }
2760
2761                 if (!delete && (backref->found_inode_ref &&
2762                                 backref->found_dir_index &&
2763                                 backref->found_dir_item &&
2764                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2765                                 !rec->found_inode_item)) {
2766                         ret = create_inode_item(root, rec, 0);
2767                         if (ret)
2768                                 break;
2769                         repaired++;
2770                 }
2771
2772         }
2773         return ret ? ret : repaired;
2774 }
2775
2776 /*
2777  * To determine the file type for nlink/inode_item repair
2778  *
2779  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2780  * Return -ENOENT if file type is not found.
2781  */
2782 static int find_file_type(struct inode_record *rec, u8 *type)
2783 {
2784         struct inode_backref *backref;
2785
2786         /* For inode item recovered case */
2787         if (rec->found_inode_item) {
2788                 *type = imode_to_type(rec->imode);
2789                 return 0;
2790         }
2791
2792         list_for_each_entry(backref, &rec->backrefs, list) {
2793                 if (backref->found_dir_index || backref->found_dir_item) {
2794                         *type = backref->filetype;
2795                         return 0;
2796                 }
2797         }
2798         return -ENOENT;
2799 }
2800
2801 /*
2802  * To determine the file name for nlink repair
2803  *
2804  * Return 0 if file name is found, set name and namelen.
2805  * Return -ENOENT if file name is not found.
2806  */
2807 static int find_file_name(struct inode_record *rec,
2808                           char *name, int *namelen)
2809 {
2810         struct inode_backref *backref;
2811
2812         list_for_each_entry(backref, &rec->backrefs, list) {
2813                 if (backref->found_dir_index || backref->found_dir_item ||
2814                     backref->found_inode_ref) {
2815                         memcpy(name, backref->name, backref->namelen);
2816                         *namelen = backref->namelen;
2817                         return 0;
2818                 }
2819         }
2820         return -ENOENT;
2821 }
2822
2823 /* Reset the nlink of the inode to the correct one */
2824 static int reset_nlink(struct btrfs_trans_handle *trans,
2825                        struct btrfs_root *root,
2826                        struct btrfs_path *path,
2827                        struct inode_record *rec)
2828 {
2829         struct inode_backref *backref;
2830         struct inode_backref *tmp;
2831         struct btrfs_key key;
2832         struct btrfs_inode_item *inode_item;
2833         int ret = 0;
2834
2835         /* We don't believe this either, reset it and iterate backref */
2836         rec->found_link = 0;
2837
2838         /* Remove all backref including the valid ones */
2839         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2840                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2841                                    backref->index, backref->name,
2842                                    backref->namelen, 0);
2843                 if (ret < 0)
2844                         goto out;
2845
2846                 /* remove invalid backref, so it won't be added back */
2847                 if (!(backref->found_dir_index &&
2848                       backref->found_dir_item &&
2849                       backref->found_inode_ref)) {
2850                         list_del(&backref->list);
2851                         free(backref);
2852                 } else {
2853                         rec->found_link++;
2854                 }
2855         }
2856
2857         /* Set nlink to 0 */
2858         key.objectid = rec->ino;
2859         key.type = BTRFS_INODE_ITEM_KEY;
2860         key.offset = 0;
2861         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2862         if (ret < 0)
2863                 goto out;
2864         if (ret > 0) {
2865                 ret = -ENOENT;
2866                 goto out;
2867         }
2868         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2869                                     struct btrfs_inode_item);
2870         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2871         btrfs_mark_buffer_dirty(path->nodes[0]);
2872         btrfs_release_path(path);
2873
2874         /*
2875          * Add back valid inode_ref/dir_item/dir_index,
2876          * add_link() will handle the nlink inc, so new nlink must be correct
2877          */
2878         list_for_each_entry(backref, &rec->backrefs, list) {
2879                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2880                                      backref->name, backref->namelen,
2881                                      backref->filetype, &backref->index, 1);
2882                 if (ret < 0)
2883                         goto out;
2884         }
2885 out:
2886         btrfs_release_path(path);
2887         return ret;
2888 }
2889
2890 static int get_highest_inode(struct btrfs_trans_handle *trans,
2891                                 struct btrfs_root *root,
2892                                 struct btrfs_path *path,
2893                                 u64 *highest_ino)
2894 {
2895         struct btrfs_key key, found_key;
2896         int ret;
2897
2898         btrfs_init_path(path);
2899         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2900         key.offset = -1;
2901         key.type = BTRFS_INODE_ITEM_KEY;
2902         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2903         if (ret == 1) {
2904                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2905                                 path->slots[0] - 1);
2906                 *highest_ino = found_key.objectid;
2907                 ret = 0;
2908         }
2909         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2910                 ret = -EOVERFLOW;
2911         btrfs_release_path(path);
2912         return ret;
2913 }
2914
2915 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2916                                struct btrfs_root *root,
2917                                struct btrfs_path *path,
2918                                struct inode_record *rec)
2919 {
2920         char *dir_name = "lost+found";
2921         char namebuf[BTRFS_NAME_LEN] = {0};
2922         u64 lost_found_ino;
2923         u32 mode = 0700;
2924         u8 type = 0;
2925         int namelen = 0;
2926         int name_recovered = 0;
2927         int type_recovered = 0;
2928         int ret = 0;
2929
2930         /*
2931          * Get file name and type first before these invalid inode ref
2932          * are deleted by remove_all_invalid_backref()
2933          */
2934         name_recovered = !find_file_name(rec, namebuf, &namelen);
2935         type_recovered = !find_file_type(rec, &type);
2936
2937         if (!name_recovered) {
2938                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2939                        rec->ino, rec->ino);
2940                 namelen = count_digits(rec->ino);
2941                 sprintf(namebuf, "%llu", rec->ino);
2942                 name_recovered = 1;
2943         }
2944         if (!type_recovered) {
2945                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2946                        rec->ino);
2947                 type = BTRFS_FT_REG_FILE;
2948                 type_recovered = 1;
2949         }
2950
2951         ret = reset_nlink(trans, root, path, rec);
2952         if (ret < 0) {
2953                 fprintf(stderr,
2954                         "Failed to reset nlink for inode %llu: %s\n",
2955                         rec->ino, strerror(-ret));
2956                 goto out;
2957         }
2958
2959         if (rec->found_link == 0) {
2960                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2961                 if (ret < 0)
2962                         goto out;
2963                 lost_found_ino++;
2964                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2965                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2966                                   mode);
2967                 if (ret < 0) {
2968                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2969                                 dir_name, strerror(-ret));
2970                         goto out;
2971                 }
2972                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2973                                      namebuf, namelen, type, NULL, 1);
2974                 /*
2975                  * Add ".INO" suffix several times to handle case where
2976                  * "FILENAME.INO" is already taken by another file.
2977                  */
2978                 while (ret == -EEXIST) {
2979                         /*
2980                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2981                          */
2982                         if (namelen + count_digits(rec->ino) + 1 >
2983                             BTRFS_NAME_LEN) {
2984                                 ret = -EFBIG;
2985                                 goto out;
2986                         }
2987                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2988                                  ".%llu", rec->ino);
2989                         namelen += count_digits(rec->ino) + 1;
2990                         ret = btrfs_add_link(trans, root, rec->ino,
2991                                              lost_found_ino, namebuf,
2992                                              namelen, type, NULL, 1);
2993                 }
2994                 if (ret < 0) {
2995                         fprintf(stderr,
2996                                 "Failed to link the inode %llu to %s dir: %s\n",
2997                                 rec->ino, dir_name, strerror(-ret));
2998                         goto out;
2999                 }
3000                 /*
3001                  * Just increase the found_link, don't actually add the
3002                  * backref. This will make things easier and this inode
3003                  * record will be freed after the repair is done.
3004                  * So fsck will not report problem about this inode.
3005                  */
3006                 rec->found_link++;
3007                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3008                        namelen, namebuf, dir_name);
3009         }
3010         printf("Fixed the nlink of inode %llu\n", rec->ino);
3011 out:
3012         /*
3013          * Clear the flag anyway, or we will loop forever for the same inode
3014          * as it will not be removed from the bad inode list and the dead loop
3015          * happens.
3016          */
3017         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3018         btrfs_release_path(path);
3019         return ret;
3020 }
3021
3022 /*
3023  * Check if there is any normal(reg or prealloc) file extent for given
3024  * ino.
3025  * This is used to determine the file type when neither its dir_index/item or
3026  * inode_item exists.
3027  *
3028  * This will *NOT* report error, if any error happens, just consider it does
3029  * not have any normal file extent.
3030  */
3031 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3032 {
3033         struct btrfs_path path;
3034         struct btrfs_key key;
3035         struct btrfs_key found_key;
3036         struct btrfs_file_extent_item *fi;
3037         u8 type;
3038         int ret = 0;
3039
3040         btrfs_init_path(&path);
3041         key.objectid = ino;
3042         key.type = BTRFS_EXTENT_DATA_KEY;
3043         key.offset = 0;
3044
3045         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3046         if (ret < 0) {
3047                 ret = 0;
3048                 goto out;
3049         }
3050         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3051                 ret = btrfs_next_leaf(root, &path);
3052                 if (ret) {
3053                         ret = 0;
3054                         goto out;
3055                 }
3056         }
3057         while (1) {
3058                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3059                                       path.slots[0]);
3060                 if (found_key.objectid != ino ||
3061                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3062                         break;
3063                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3064                                     struct btrfs_file_extent_item);
3065                 type = btrfs_file_extent_type(path.nodes[0], fi);
3066                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3067                         ret = 1;
3068                         goto out;
3069                 }
3070         }
3071 out:
3072         btrfs_release_path(&path);
3073         return ret;
3074 }
3075
3076 static u32 btrfs_type_to_imode(u8 type)
3077 {
3078         static u32 imode_by_btrfs_type[] = {
3079                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3080                 [BTRFS_FT_DIR]          = S_IFDIR,
3081                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3082                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3083                 [BTRFS_FT_FIFO]         = S_IFIFO,
3084                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3085                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3086         };
3087
3088         return imode_by_btrfs_type[(type)];
3089 }
3090
3091 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3092                                 struct btrfs_root *root,
3093                                 struct btrfs_path *path,
3094                                 struct inode_record *rec)
3095 {
3096         u8 filetype;
3097         u32 mode = 0700;
3098         int type_recovered = 0;
3099         int ret = 0;
3100
3101         printf("Trying to rebuild inode:%llu\n", rec->ino);
3102
3103         type_recovered = !find_file_type(rec, &filetype);
3104
3105         /*
3106          * Try to determine inode type if type not found.
3107          *
3108          * For found regular file extent, it must be FILE.
3109          * For found dir_item/index, it must be DIR.
3110          *
3111          * For undetermined one, use FILE as fallback.
3112          *
3113          * TODO:
3114          * 1. If found backref(inode_index/item is already handled) to it,
3115          *    it must be DIR.
3116          *    Need new inode-inode ref structure to allow search for that.
3117          */
3118         if (!type_recovered) {
3119                 if (rec->found_file_extent &&
3120                     find_normal_file_extent(root, rec->ino)) {
3121                         type_recovered = 1;
3122                         filetype = BTRFS_FT_REG_FILE;
3123                 } else if (rec->found_dir_item) {
3124                         type_recovered = 1;
3125                         filetype = BTRFS_FT_DIR;
3126                 } else if (!list_empty(&rec->orphan_extents)) {
3127                         type_recovered = 1;
3128                         filetype = BTRFS_FT_REG_FILE;
3129                 } else{
3130                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3131                                rec->ino);
3132                         type_recovered = 1;
3133                         filetype = BTRFS_FT_REG_FILE;
3134                 }
3135         }
3136
3137         ret = btrfs_new_inode(trans, root, rec->ino,
3138                               mode | btrfs_type_to_imode(filetype));
3139         if (ret < 0)
3140                 goto out;
3141
3142         /*
3143          * Here inode rebuild is done, we only rebuild the inode item,
3144          * don't repair the nlink(like move to lost+found).
3145          * That is the job of nlink repair.
3146          *
3147          * We just fill the record and return
3148          */
3149         rec->found_dir_item = 1;
3150         rec->imode = mode | btrfs_type_to_imode(filetype);
3151         rec->nlink = 0;
3152         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3153         /* Ensure the inode_nlinks repair function will be called */
3154         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3155 out:
3156         return ret;
3157 }
3158
3159 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3160                                       struct btrfs_root *root,
3161                                       struct btrfs_path *path,
3162                                       struct inode_record *rec)
3163 {
3164         struct orphan_data_extent *orphan;
3165         struct orphan_data_extent *tmp;
3166         int ret = 0;
3167
3168         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3169                 /*
3170                  * Check for conflicting file extents
3171                  *
3172                  * Here we don't know whether the extents is compressed or not,
3173                  * so we can only assume it not compressed nor data offset,
3174                  * and use its disk_len as extent length.
3175                  */
3176                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3177                                        orphan->offset, orphan->disk_len, 0);
3178                 btrfs_release_path(path);
3179                 if (ret < 0)
3180                         goto out;
3181                 if (!ret) {
3182                         fprintf(stderr,
3183                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3184                                 orphan->disk_bytenr, orphan->disk_len);
3185                         ret = btrfs_free_extent(trans,
3186                                         root->fs_info->extent_root,
3187                                         orphan->disk_bytenr, orphan->disk_len,
3188                                         0, root->objectid, orphan->objectid,
3189                                         orphan->offset);
3190                         if (ret < 0)
3191                                 goto out;
3192                 }
3193                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3194                                 orphan->offset, orphan->disk_bytenr,
3195                                 orphan->disk_len, orphan->disk_len);
3196                 if (ret < 0)
3197                         goto out;
3198
3199                 /* Update file size info */
3200                 rec->found_size += orphan->disk_len;
3201                 if (rec->found_size == rec->nbytes)
3202                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3203
3204                 /* Update the file extent hole info too */
3205                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3206                                            orphan->disk_len);
3207                 if (ret < 0)
3208                         goto out;
3209                 if (RB_EMPTY_ROOT(&rec->holes))
3210                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3211
3212                 list_del(&orphan->list);
3213                 free(orphan);
3214         }
3215         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3216 out:
3217         return ret;
3218 }
3219
3220 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3221                                         struct btrfs_root *root,
3222                                         struct btrfs_path *path,
3223                                         struct inode_record *rec)
3224 {
3225         struct rb_node *node;
3226         struct file_extent_hole *hole;
3227         int found = 0;
3228         int ret = 0;
3229
3230         node = rb_first(&rec->holes);
3231
3232         while (node) {
3233                 found = 1;
3234                 hole = rb_entry(node, struct file_extent_hole, node);
3235                 ret = btrfs_punch_hole(trans, root, rec->ino,
3236                                        hole->start, hole->len);
3237                 if (ret < 0)
3238                         goto out;
3239                 ret = del_file_extent_hole(&rec->holes, hole->start,
3240                                            hole->len);
3241                 if (ret < 0)
3242                         goto out;
3243                 if (RB_EMPTY_ROOT(&rec->holes))
3244                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3245                 node = rb_first(&rec->holes);
3246         }
3247         /* special case for a file losing all its file extent */
3248         if (!found) {
3249                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3250                                        round_up(rec->isize,
3251                                                 root->fs_info->sectorsize));
3252                 if (ret < 0)
3253                         goto out;
3254         }
3255         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3256                rec->ino, root->objectid);
3257 out:
3258         return ret;
3259 }
3260
3261 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3262 {
3263         struct btrfs_trans_handle *trans;
3264         struct btrfs_path path;
3265         int ret = 0;
3266
3267         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3268                              I_ERR_NO_ORPHAN_ITEM |
3269                              I_ERR_LINK_COUNT_WRONG |
3270                              I_ERR_NO_INODE_ITEM |
3271                              I_ERR_FILE_EXTENT_ORPHAN |
3272                              I_ERR_FILE_EXTENT_DISCOUNT|
3273                              I_ERR_FILE_NBYTES_WRONG)))
3274                 return rec->errors;
3275
3276         /*
3277          * For nlink repair, it may create a dir and add link, so
3278          * 2 for parent(256)'s dir_index and dir_item
3279          * 2 for lost+found dir's inode_item and inode_ref
3280          * 1 for the new inode_ref of the file
3281          * 2 for lost+found dir's dir_index and dir_item for the file
3282          */
3283         trans = btrfs_start_transaction(root, 7);
3284         if (IS_ERR(trans))
3285                 return PTR_ERR(trans);
3286
3287         btrfs_init_path(&path);
3288         if (rec->errors & I_ERR_NO_INODE_ITEM)
3289                 ret = repair_inode_no_item(trans, root, &path, rec);
3290         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3291                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3292         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3293                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3294         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3295                 ret = repair_inode_isize(trans, root, &path, rec);
3296         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3297                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3298         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3299                 ret = repair_inode_nlinks(trans, root, &path, rec);
3300         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3301                 ret = repair_inode_nbytes(trans, root, &path, rec);
3302         btrfs_commit_transaction(trans, root);
3303         btrfs_release_path(&path);
3304         return ret;
3305 }
3306
3307 static int check_inode_recs(struct btrfs_root *root,
3308                             struct cache_tree *inode_cache)
3309 {
3310         struct cache_extent *cache;
3311         struct ptr_node *node;
3312         struct inode_record *rec;
3313         struct inode_backref *backref;
3314         int stage = 0;
3315         int ret = 0;
3316         int err = 0;
3317         u64 error = 0;
3318         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3319
3320         if (btrfs_root_refs(&root->root_item) == 0) {
3321                 if (!cache_tree_empty(inode_cache))
3322                         fprintf(stderr, "warning line %d\n", __LINE__);
3323                 return 0;
3324         }
3325
3326         /*
3327          * We need to repair backrefs first because we could change some of the
3328          * errors in the inode recs.
3329          *
3330          * We also need to go through and delete invalid backrefs first and then
3331          * add the correct ones second.  We do this because we may get EEXIST
3332          * when adding back the correct index because we hadn't yet deleted the
3333          * invalid index.
3334          *
3335          * For example, if we were missing a dir index then the directories
3336          * isize would be wrong, so if we fixed the isize to what we thought it
3337          * would be and then fixed the backref we'd still have a invalid fs, so
3338          * we need to add back the dir index and then check to see if the isize
3339          * is still wrong.
3340          */
3341         while (stage < 3) {
3342                 stage++;
3343                 if (stage == 3 && !err)
3344                         break;
3345
3346                 cache = search_cache_extent(inode_cache, 0);
3347                 while (repair && cache) {
3348                         node = container_of(cache, struct ptr_node, cache);
3349                         rec = node->data;
3350                         cache = next_cache_extent(cache);
3351
3352                         /* Need to free everything up and rescan */
3353                         if (stage == 3) {
3354                                 remove_cache_extent(inode_cache, &node->cache);
3355                                 free(node);
3356                                 free_inode_rec(rec);
3357                                 continue;
3358                         }
3359
3360                         if (list_empty(&rec->backrefs))
3361                                 continue;
3362
3363                         ret = repair_inode_backrefs(root, rec, inode_cache,
3364                                                     stage == 1);
3365                         if (ret < 0) {
3366                                 err = ret;
3367                                 stage = 2;
3368                                 break;
3369                         } if (ret > 0) {
3370                                 err = -EAGAIN;
3371                         }
3372                 }
3373         }
3374         if (err)
3375                 return err;
3376
3377         rec = get_inode_rec(inode_cache, root_dirid, 0);
3378         BUG_ON(IS_ERR(rec));
3379         if (rec) {
3380                 ret = check_root_dir(rec);
3381                 if (ret) {
3382                         fprintf(stderr, "root %llu root dir %llu error\n",
3383                                 (unsigned long long)root->root_key.objectid,
3384                                 (unsigned long long)root_dirid);
3385                         print_inode_error(root, rec);
3386                         error++;
3387                 }
3388         } else {
3389                 if (repair) {
3390                         struct btrfs_trans_handle *trans;
3391
3392                         trans = btrfs_start_transaction(root, 1);
3393                         if (IS_ERR(trans)) {
3394                                 err = PTR_ERR(trans);
3395                                 return err;
3396                         }
3397
3398                         fprintf(stderr,
3399                                 "root %llu missing its root dir, recreating\n",
3400                                 (unsigned long long)root->objectid);
3401
3402                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3403                         BUG_ON(ret);
3404
3405                         btrfs_commit_transaction(trans, root);
3406                         return -EAGAIN;
3407                 }
3408
3409                 fprintf(stderr, "root %llu root dir %llu not found\n",
3410                         (unsigned long long)root->root_key.objectid,
3411                         (unsigned long long)root_dirid);
3412         }
3413
3414         while (1) {
3415                 cache = search_cache_extent(inode_cache, 0);
3416                 if (!cache)
3417                         break;
3418                 node = container_of(cache, struct ptr_node, cache);
3419                 rec = node->data;
3420                 remove_cache_extent(inode_cache, &node->cache);
3421                 free(node);
3422                 if (rec->ino == root_dirid ||
3423                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3424                         free_inode_rec(rec);
3425                         continue;
3426                 }
3427
3428                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3429                         ret = check_orphan_item(root, rec->ino);
3430                         if (ret == 0)
3431                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3432                         if (can_free_inode_rec(rec)) {
3433                                 free_inode_rec(rec);
3434                                 continue;
3435                         }
3436                 }
3437
3438                 if (!rec->found_inode_item)
3439                         rec->errors |= I_ERR_NO_INODE_ITEM;
3440                 if (rec->found_link != rec->nlink)
3441                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3442                 if (repair) {
3443                         ret = try_repair_inode(root, rec);
3444                         if (ret == 0 && can_free_inode_rec(rec)) {
3445                                 free_inode_rec(rec);
3446                                 continue;
3447                         }
3448                         ret = 0;
3449                 }
3450
3451                 if (!(repair && ret == 0))
3452                         error++;
3453                 print_inode_error(root, rec);
3454                 list_for_each_entry(backref, &rec->backrefs, list) {
3455                         if (!backref->found_dir_item)
3456                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3457                         if (!backref->found_dir_index)
3458                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3459                         if (!backref->found_inode_ref)
3460                                 backref->errors |= REF_ERR_NO_INODE_REF;
3461                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3462                                 " namelen %u name %s filetype %d errors %x",
3463                                 (unsigned long long)backref->dir,
3464                                 (unsigned long long)backref->index,
3465                                 backref->namelen, backref->name,
3466                                 backref->filetype, backref->errors);
3467                         print_ref_error(backref->errors);
3468                 }
3469                 free_inode_rec(rec);
3470         }
3471         return (error > 0) ? -1 : 0;
3472 }
3473
3474 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3475                                         u64 objectid)
3476 {
3477         struct cache_extent *cache;
3478         struct root_record *rec = NULL;
3479         int ret;
3480
3481         cache = lookup_cache_extent(root_cache, objectid, 1);
3482         if (cache) {
3483                 rec = container_of(cache, struct root_record, cache);
3484         } else {
3485                 rec = calloc(1, sizeof(*rec));
3486                 if (!rec)
3487                         return ERR_PTR(-ENOMEM);
3488                 rec->objectid = objectid;
3489                 INIT_LIST_HEAD(&rec->backrefs);
3490                 rec->cache.start = objectid;
3491                 rec->cache.size = 1;
3492
3493                 ret = insert_cache_extent(root_cache, &rec->cache);
3494                 if (ret)
3495                         return ERR_PTR(-EEXIST);
3496         }
3497         return rec;
3498 }
3499
3500 static struct root_backref *get_root_backref(struct root_record *rec,
3501                                              u64 ref_root, u64 dir, u64 index,
3502                                              const char *name, int namelen)
3503 {
3504         struct root_backref *backref;
3505
3506         list_for_each_entry(backref, &rec->backrefs, list) {
3507                 if (backref->ref_root != ref_root || backref->dir != dir ||
3508                     backref->namelen != namelen)
3509                         continue;
3510                 if (memcmp(name, backref->name, namelen))
3511                         continue;
3512                 return backref;
3513         }
3514
3515         backref = calloc(1, sizeof(*backref) + namelen + 1);
3516         if (!backref)
3517                 return NULL;
3518         backref->ref_root = ref_root;
3519         backref->dir = dir;
3520         backref->index = index;
3521         backref->namelen = namelen;
3522         memcpy(backref->name, name, namelen);
3523         backref->name[namelen] = '\0';
3524         list_add_tail(&backref->list, &rec->backrefs);
3525         return backref;
3526 }
3527
3528 static void free_root_record(struct cache_extent *cache)
3529 {
3530         struct root_record *rec;
3531         struct root_backref *backref;
3532
3533         rec = container_of(cache, struct root_record, cache);
3534         while (!list_empty(&rec->backrefs)) {
3535                 backref = to_root_backref(rec->backrefs.next);
3536                 list_del(&backref->list);
3537                 free(backref);
3538         }
3539
3540         free(rec);
3541 }
3542
3543 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3544
3545 static int add_root_backref(struct cache_tree *root_cache,
3546                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3547                             const char *name, int namelen,
3548                             int item_type, int errors)
3549 {
3550         struct root_record *rec;
3551         struct root_backref *backref;
3552
3553         rec = get_root_rec(root_cache, root_id);
3554         BUG_ON(IS_ERR(rec));
3555         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3556         BUG_ON(!backref);
3557
3558         backref->errors |= errors;
3559
3560         if (item_type != BTRFS_DIR_ITEM_KEY) {
3561                 if (backref->found_dir_index || backref->found_back_ref ||
3562                     backref->found_forward_ref) {
3563                         if (backref->index != index)
3564                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3565                 } else {
3566                         backref->index = index;
3567                 }
3568         }
3569
3570         if (item_type == BTRFS_DIR_ITEM_KEY) {
3571                 if (backref->found_forward_ref)
3572                         rec->found_ref++;
3573                 backref->found_dir_item = 1;
3574         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3575                 backref->found_dir_index = 1;
3576         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3577                 if (backref->found_forward_ref)
3578                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3579                 else if (backref->found_dir_item)
3580                         rec->found_ref++;
3581                 backref->found_forward_ref = 1;
3582         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3583                 if (backref->found_back_ref)
3584                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3585                 backref->found_back_ref = 1;
3586         } else {
3587                 BUG_ON(1);
3588         }
3589
3590         if (backref->found_forward_ref && backref->found_dir_item)
3591                 backref->reachable = 1;
3592         return 0;
3593 }
3594
3595 static int merge_root_recs(struct btrfs_root *root,
3596                            struct cache_tree *src_cache,
3597                            struct cache_tree *dst_cache)
3598 {
3599         struct cache_extent *cache;
3600         struct ptr_node *node;
3601         struct inode_record *rec;
3602         struct inode_backref *backref;
3603         int ret = 0;
3604
3605         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3606                 free_inode_recs_tree(src_cache);
3607                 return 0;
3608         }
3609
3610         while (1) {
3611                 cache = search_cache_extent(src_cache, 0);
3612                 if (!cache)
3613                         break;
3614                 node = container_of(cache, struct ptr_node, cache);
3615                 rec = node->data;
3616                 remove_cache_extent(src_cache, &node->cache);
3617                 free(node);
3618
3619                 ret = is_child_root(root, root->objectid, rec->ino);
3620                 if (ret < 0)
3621                         break;
3622                 else if (ret == 0)
3623                         goto skip;
3624
3625                 list_for_each_entry(backref, &rec->backrefs, list) {
3626                         BUG_ON(backref->found_inode_ref);
3627                         if (backref->found_dir_item)
3628                                 add_root_backref(dst_cache, rec->ino,
3629                                         root->root_key.objectid, backref->dir,
3630                                         backref->index, backref->name,
3631                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3632                                         backref->errors);
3633                         if (backref->found_dir_index)
3634                                 add_root_backref(dst_cache, rec->ino,
3635                                         root->root_key.objectid, backref->dir,
3636                                         backref->index, backref->name,
3637                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3638                                         backref->errors);
3639                 }
3640 skip:
3641                 free_inode_rec(rec);
3642         }
3643         if (ret < 0)
3644                 return ret;
3645         return 0;
3646 }
3647
3648 static int check_root_refs(struct btrfs_root *root,
3649                            struct cache_tree *root_cache)
3650 {
3651         struct root_record *rec;
3652         struct root_record *ref_root;
3653         struct root_backref *backref;
3654         struct cache_extent *cache;
3655         int loop = 1;
3656         int ret;
3657         int error;
3658         int errors = 0;
3659
3660         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3661         BUG_ON(IS_ERR(rec));
3662         rec->found_ref = 1;
3663
3664         /* fixme: this can not detect circular references */
3665         while (loop) {
3666                 loop = 0;
3667                 cache = search_cache_extent(root_cache, 0);
3668                 while (1) {
3669                         if (!cache)
3670                                 break;
3671                         rec = container_of(cache, struct root_record, cache);
3672                         cache = next_cache_extent(cache);
3673
3674                         if (rec->found_ref == 0)
3675                                 continue;
3676
3677                         list_for_each_entry(backref, &rec->backrefs, list) {
3678                                 if (!backref->reachable)
3679                                         continue;
3680
3681                                 ref_root = get_root_rec(root_cache,
3682                                                         backref->ref_root);
3683                                 BUG_ON(IS_ERR(ref_root));
3684                                 if (ref_root->found_ref > 0)
3685                                         continue;
3686
3687                                 backref->reachable = 0;
3688                                 rec->found_ref--;
3689                                 if (rec->found_ref == 0)
3690                                         loop = 1;
3691                         }
3692                 }
3693         }
3694
3695         cache = search_cache_extent(root_cache, 0);
3696         while (1) {
3697                 if (!cache)
3698                         break;
3699                 rec = container_of(cache, struct root_record, cache);
3700                 cache = next_cache_extent(cache);
3701
3702                 if (rec->found_ref == 0 &&
3703                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3704                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3705                         ret = check_orphan_item(root->fs_info->tree_root,
3706                                                 rec->objectid);
3707                         if (ret == 0)
3708                                 continue;
3709
3710                         /*
3711                          * If we don't have a root item then we likely just have
3712                          * a dir item in a snapshot for this root but no actual
3713                          * ref key or anything so it's meaningless.
3714                          */
3715                         if (!rec->found_root_item)
3716                                 continue;
3717                         errors++;
3718                         fprintf(stderr, "fs tree %llu not referenced\n",
3719                                 (unsigned long long)rec->objectid);
3720                 }
3721
3722                 error = 0;
3723                 if (rec->found_ref > 0 && !rec->found_root_item)
3724                         error = 1;
3725                 list_for_each_entry(backref, &rec->backrefs, list) {
3726                         if (!backref->found_dir_item)
3727                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3728                         if (!backref->found_dir_index)
3729                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3730                         if (!backref->found_back_ref)
3731                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3732                         if (!backref->found_forward_ref)
3733                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3734                         if (backref->reachable && backref->errors)
3735                                 error = 1;
3736                 }
3737                 if (!error)
3738                         continue;
3739
3740                 errors++;
3741                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3742                         (unsigned long long)rec->objectid, rec->found_ref,
3743                          rec->found_root_item ? "" : "not found");
3744
3745                 list_for_each_entry(backref, &rec->backrefs, list) {
3746                         if (!backref->reachable)
3747                                 continue;
3748                         if (!backref->errors && rec->found_root_item)
3749                                 continue;
3750                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3751                                 " index %llu namelen %u name %s errors %x\n",
3752                                 (unsigned long long)backref->ref_root,
3753                                 (unsigned long long)backref->dir,
3754                                 (unsigned long long)backref->index,
3755                                 backref->namelen, backref->name,
3756                                 backref->errors);
3757                         print_ref_error(backref->errors);
3758                 }
3759         }
3760         return errors > 0 ? 1 : 0;
3761 }
3762
3763 static int process_root_ref(struct extent_buffer *eb, int slot,
3764                             struct btrfs_key *key,
3765                             struct cache_tree *root_cache)
3766 {
3767         u64 dirid;
3768         u64 index;
3769         u32 len;
3770         u32 name_len;
3771         struct btrfs_root_ref *ref;
3772         char namebuf[BTRFS_NAME_LEN];
3773         int error;
3774
3775         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3776
3777         dirid = btrfs_root_ref_dirid(eb, ref);
3778         index = btrfs_root_ref_sequence(eb, ref);
3779         name_len = btrfs_root_ref_name_len(eb, ref);
3780
3781         if (name_len <= BTRFS_NAME_LEN) {
3782                 len = name_len;
3783                 error = 0;
3784         } else {
3785                 len = BTRFS_NAME_LEN;
3786                 error = REF_ERR_NAME_TOO_LONG;
3787         }
3788         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3789
3790         if (key->type == BTRFS_ROOT_REF_KEY) {
3791                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3792                                  index, namebuf, len, key->type, error);
3793         } else {
3794                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3795                                  index, namebuf, len, key->type, error);
3796         }
3797         return 0;
3798 }
3799
3800 static void free_corrupt_block(struct cache_extent *cache)
3801 {
3802         struct btrfs_corrupt_block *corrupt;
3803
3804         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3805         free(corrupt);
3806 }
3807
3808 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3809
3810 /*
3811  * Repair the btree of the given root.
3812  *
3813  * The fix is to remove the node key in corrupt_blocks cache_tree.
3814  * and rebalance the tree.
3815  * After the fix, the btree should be writeable.
3816  */
3817 static int repair_btree(struct btrfs_root *root,
3818                         struct cache_tree *corrupt_blocks)
3819 {
3820         struct btrfs_trans_handle *trans;
3821         struct btrfs_path path;
3822         struct btrfs_corrupt_block *corrupt;
3823         struct cache_extent *cache;
3824         struct btrfs_key key;
3825         u64 offset;
3826         int level;
3827         int ret = 0;
3828
3829         if (cache_tree_empty(corrupt_blocks))
3830                 return 0;
3831
3832         trans = btrfs_start_transaction(root, 1);
3833         if (IS_ERR(trans)) {
3834                 ret = PTR_ERR(trans);
3835                 fprintf(stderr, "Error starting transaction: %s\n",
3836                         strerror(-ret));
3837                 return ret;
3838         }
3839         btrfs_init_path(&path);
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 level = corrupt->level;
3845                 path.lowest_level = level;
3846                 key.objectid = corrupt->key.objectid;
3847                 key.type = corrupt->key.type;
3848                 key.offset = corrupt->key.offset;
3849
3850                 /*
3851                  * Here we don't want to do any tree balance, since it may
3852                  * cause a balance with corrupted brother leaf/node,
3853                  * so ins_len set to 0 here.
3854                  * Balance will be done after all corrupt node/leaf is deleted.
3855                  */
3856                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3857                 if (ret < 0)
3858                         goto out;
3859                 offset = btrfs_node_blockptr(path.nodes[level],
3860                                              path.slots[level]);
3861
3862                 /* Remove the ptr */
3863                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3864                 if (ret < 0)
3865                         goto out;
3866                 /*
3867                  * Remove the corresponding extent
3868                  * return value is not concerned.
3869                  */
3870                 btrfs_release_path(&path);
3871                 ret = btrfs_free_extent(trans, root, offset,
3872                                 root->fs_info->nodesize, 0,
3873                                 root->root_key.objectid, level - 1, 0);
3874                 cache = next_cache_extent(cache);
3875         }
3876
3877         /* Balance the btree using btrfs_search_slot() */
3878         cache = first_cache_extent(corrupt_blocks);
3879         while (cache) {
3880                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3881                                        cache);
3882                 memcpy(&key, &corrupt->key, sizeof(key));
3883                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3884                 if (ret < 0)
3885                         goto out;
3886                 /* return will always >0 since it won't find the item */
3887                 ret = 0;
3888                 btrfs_release_path(&path);
3889                 cache = next_cache_extent(cache);
3890         }
3891 out:
3892         btrfs_commit_transaction(trans, root);
3893         btrfs_release_path(&path);
3894         return ret;
3895 }
3896
3897 static int check_fs_root(struct btrfs_root *root,
3898                          struct cache_tree *root_cache,
3899                          struct walk_control *wc)
3900 {
3901         int ret = 0;
3902         int err = 0;
3903         int wret;
3904         int level;
3905         struct btrfs_path path;
3906         struct shared_node root_node;
3907         struct root_record *rec;
3908         struct btrfs_root_item *root_item = &root->root_item;
3909         struct cache_tree corrupt_blocks;
3910         struct orphan_data_extent *orphan;
3911         struct orphan_data_extent *tmp;
3912         enum btrfs_tree_block_status status;
3913         struct node_refs nrefs;
3914
3915         /*
3916          * Reuse the corrupt_block cache tree to record corrupted tree block
3917          *
3918          * Unlike the usage in extent tree check, here we do it in a per
3919          * fs/subvol tree base.
3920          */
3921         cache_tree_init(&corrupt_blocks);
3922         root->fs_info->corrupt_blocks = &corrupt_blocks;
3923
3924         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3925                 rec = get_root_rec(root_cache, root->root_key.objectid);
3926                 BUG_ON(IS_ERR(rec));
3927                 if (btrfs_root_refs(root_item) > 0)
3928                         rec->found_root_item = 1;
3929         }
3930
3931         btrfs_init_path(&path);
3932         memset(&root_node, 0, sizeof(root_node));
3933         cache_tree_init(&root_node.root_cache);
3934         cache_tree_init(&root_node.inode_cache);
3935         memset(&nrefs, 0, sizeof(nrefs));
3936
3937         /* Move the orphan extent record to corresponding inode_record */
3938         list_for_each_entry_safe(orphan, tmp,
3939                                  &root->orphan_data_extents, list) {
3940                 struct inode_record *inode;
3941
3942                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3943                                       1);
3944                 BUG_ON(IS_ERR(inode));
3945                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3946                 list_move(&orphan->list, &inode->orphan_extents);
3947         }
3948
3949         level = btrfs_header_level(root->node);
3950         memset(wc->nodes, 0, sizeof(wc->nodes));
3951         wc->nodes[level] = &root_node;
3952         wc->active_node = level;
3953         wc->root_level = level;
3954
3955         /* We may not have checked the root block, lets do that now */
3956         if (btrfs_is_leaf(root->node))
3957                 status = btrfs_check_leaf(root, NULL, root->node);
3958         else
3959                 status = btrfs_check_node(root, NULL, root->node);
3960         if (status != BTRFS_TREE_BLOCK_CLEAN)
3961                 return -EIO;
3962
3963         if (btrfs_root_refs(root_item) > 0 ||
3964             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3965                 path.nodes[level] = root->node;
3966                 extent_buffer_get(root->node);
3967                 path.slots[level] = 0;
3968         } else {
3969                 struct btrfs_key key;
3970                 struct btrfs_disk_key found_key;
3971
3972                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3973                 level = root_item->drop_level;
3974                 path.lowest_level = level;
3975                 if (level > btrfs_header_level(root->node) ||
3976                     level >= BTRFS_MAX_LEVEL) {
3977                         error("ignoring invalid drop level: %u", level);
3978                         goto skip_walking;
3979                 }
3980                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3981                 if (wret < 0)
3982                         goto skip_walking;
3983                 btrfs_node_key(path.nodes[level], &found_key,
3984                                 path.slots[level]);
3985                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3986                                         sizeof(found_key)));
3987         }
3988
3989         while (1) {
3990                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3991                 if (wret < 0)
3992                         ret = wret;
3993                 if (wret != 0)
3994                         break;
3995
3996                 wret = walk_up_tree(root, &path, wc, &level);
3997                 if (wret < 0)
3998                         ret = wret;
3999                 if (wret != 0)
4000                         break;
4001         }
4002 skip_walking:
4003         btrfs_release_path(&path);
4004
4005         if (!cache_tree_empty(&corrupt_blocks)) {
4006                 struct cache_extent *cache;
4007                 struct btrfs_corrupt_block *corrupt;
4008
4009                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4010                        root->root_key.objectid);
4011                 cache = first_cache_extent(&corrupt_blocks);
4012                 while (cache) {
4013                         corrupt = container_of(cache,
4014                                                struct btrfs_corrupt_block,
4015                                                cache);
4016                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4017                                cache->start, corrupt->level,
4018                                corrupt->key.objectid, corrupt->key.type,
4019                                corrupt->key.offset);
4020                         cache = next_cache_extent(cache);
4021                 }
4022                 if (repair) {
4023                         printf("Try to repair the btree for root %llu\n",
4024                                root->root_key.objectid);
4025                         ret = repair_btree(root, &corrupt_blocks);
4026                         if (ret < 0)
4027                                 fprintf(stderr, "Failed to repair btree: %s\n",
4028                                         strerror(-ret));
4029                         if (!ret)
4030                                 printf("Btree for root %llu is fixed\n",
4031                                        root->root_key.objectid);
4032                 }
4033         }
4034
4035         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4036         if (err < 0)
4037                 ret = err;
4038
4039         if (root_node.current) {
4040                 root_node.current->checked = 1;
4041                 maybe_free_inode_rec(&root_node.inode_cache,
4042                                 root_node.current);
4043         }
4044
4045         err = check_inode_recs(root, &root_node.inode_cache);
4046         if (!ret)
4047                 ret = err;
4048
4049         free_corrupt_blocks_tree(&corrupt_blocks);
4050         root->fs_info->corrupt_blocks = NULL;
4051         free_orphan_data_extents(&root->orphan_data_extents);
4052         return ret;
4053 }
4054
4055 static int fs_root_objectid(u64 objectid)
4056 {
4057         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4058             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4059                 return 1;
4060         return is_fstree(objectid);
4061 }
4062
4063 static int check_fs_roots(struct btrfs_root *root,
4064                           struct cache_tree *root_cache)
4065 {
4066         struct btrfs_path path;
4067         struct btrfs_key key;
4068         struct walk_control wc;
4069         struct extent_buffer *leaf, *tree_node;
4070         struct btrfs_root *tmp_root;
4071         struct btrfs_root *tree_root = root->fs_info->tree_root;
4072         int ret;
4073         int err = 0;
4074
4075         if (ctx.progress_enabled) {
4076                 ctx.tp = TASK_FS_ROOTS;
4077                 task_start(ctx.info);
4078         }
4079
4080         /*
4081          * Just in case we made any changes to the extent tree that weren't
4082          * reflected into the free space cache yet.
4083          */
4084         if (repair)
4085                 reset_cached_block_groups(root->fs_info);
4086         memset(&wc, 0, sizeof(wc));
4087         cache_tree_init(&wc.shared);
4088         btrfs_init_path(&path);
4089
4090 again:
4091         key.offset = 0;
4092         key.objectid = 0;
4093         key.type = BTRFS_ROOT_ITEM_KEY;
4094         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4095         if (ret < 0) {
4096                 err = 1;
4097                 goto out;
4098         }
4099         tree_node = tree_root->node;
4100         while (1) {
4101                 if (tree_node != tree_root->node) {
4102                         free_root_recs_tree(root_cache);
4103                         btrfs_release_path(&path);
4104                         goto again;
4105                 }
4106                 leaf = path.nodes[0];
4107                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4108                         ret = btrfs_next_leaf(tree_root, &path);
4109                         if (ret) {
4110                                 if (ret < 0)
4111                                         err = 1;
4112                                 break;
4113                         }
4114                         leaf = path.nodes[0];
4115                 }
4116                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4117                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4118                     fs_root_objectid(key.objectid)) {
4119                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4120                                 tmp_root = btrfs_read_fs_root_no_cache(
4121                                                 root->fs_info, &key);
4122                         } else {
4123                                 key.offset = (u64)-1;
4124                                 tmp_root = btrfs_read_fs_root(
4125                                                 root->fs_info, &key);
4126                         }
4127                         if (IS_ERR(tmp_root)) {
4128                                 err = 1;
4129                                 goto next;
4130                         }
4131                         ret = check_fs_root(tmp_root, root_cache, &wc);
4132                         if (ret == -EAGAIN) {
4133                                 free_root_recs_tree(root_cache);
4134                                 btrfs_release_path(&path);
4135                                 goto again;
4136                         }
4137                         if (ret)
4138                                 err = 1;
4139                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4140                                 btrfs_free_fs_root(tmp_root);
4141                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4142                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4143                         process_root_ref(leaf, path.slots[0], &key,
4144                                          root_cache);
4145                 }
4146 next:
4147                 path.slots[0]++;
4148         }
4149 out:
4150         btrfs_release_path(&path);
4151         if (err)
4152                 free_extent_cache_tree(&wc.shared);
4153         if (!cache_tree_empty(&wc.shared))
4154                 fprintf(stderr, "warning line %d\n", __LINE__);
4155
4156         task_stop(ctx.info);
4157
4158         return err;
4159 }
4160
4161 /*
4162  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4163  * INODE_REF/INODE_EXTREF match.
4164  *
4165  * @root:       the root of the fs/file tree
4166  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4167  * @key:        the key of the DIR_ITEM/DIR_INDEX
4168  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4169  *              distinguish root_dir between normal dir/file
4170  * @name:       the name in the INODE_REF/INODE_EXTREF
4171  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4172  * @mode:       the st_mode of INODE_ITEM
4173  *
4174  * Return 0 if no error occurred.
4175  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4176  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4177  * dir/file.
4178  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4179  * not match for normal dir/file.
4180  */
4181 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4182                          struct btrfs_key *key, u64 index, char *name,
4183                          u32 namelen, u32 mode)
4184 {
4185         struct btrfs_path path;
4186         struct extent_buffer *node;
4187         struct btrfs_dir_item *di;
4188         struct btrfs_key location;
4189         char namebuf[BTRFS_NAME_LEN] = {0};
4190         u32 total;
4191         u32 cur = 0;
4192         u32 len;
4193         u32 name_len;
4194         u32 data_len;
4195         u8 filetype;
4196         int slot;
4197         int ret;
4198
4199         btrfs_init_path(&path);
4200         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4201         if (ret < 0) {
4202                 ret = DIR_ITEM_MISSING;
4203                 goto out;
4204         }
4205
4206         /* Process root dir and goto out*/
4207         if (index == 0) {
4208                 if (ret == 0) {
4209                         ret = ROOT_DIR_ERROR;
4210                         error(
4211                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4212                                 root->objectid,
4213                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4214                                         "REF" : "EXTREF",
4215                                 ref_key->objectid, ref_key->offset,
4216                                 key->type == BTRFS_DIR_ITEM_KEY ?
4217                                         "DIR_ITEM" : "DIR_INDEX");
4218                 } else {
4219                         ret = 0;
4220                 }
4221
4222                 goto out;
4223         }
4224
4225         /* Process normal file/dir */
4226         if (ret > 0) {
4227                 ret = DIR_ITEM_MISSING;
4228                 error(
4229                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4230                         root->objectid,
4231                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4232                         ref_key->objectid, ref_key->offset,
4233                         key->type == BTRFS_DIR_ITEM_KEY ?
4234                                 "DIR_ITEM" : "DIR_INDEX",
4235                         key->objectid, key->offset, namelen, name,
4236                         imode_to_type(mode));
4237                 goto out;
4238         }
4239
4240         /* Check whether inode_id/filetype/name match */
4241         node = path.nodes[0];
4242         slot = path.slots[0];
4243         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4244         total = btrfs_item_size_nr(node, slot);
4245         while (cur < total) {
4246                 ret = DIR_ITEM_MISMATCH;
4247                 name_len = btrfs_dir_name_len(node, di);
4248                 data_len = btrfs_dir_data_len(node, di);
4249
4250                 btrfs_dir_item_key_to_cpu(node, di, &location);
4251                 if (location.objectid != ref_key->objectid ||
4252                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4253                     location.offset != 0)
4254                         goto next;
4255
4256                 filetype = btrfs_dir_type(node, di);
4257                 if (imode_to_type(mode) != filetype)
4258                         goto next;
4259
4260                 if (cur + sizeof(*di) + name_len > total ||
4261                     name_len > BTRFS_NAME_LEN) {
4262                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4263                                 root->objectid,
4264                                 key->type == BTRFS_DIR_ITEM_KEY ?
4265                                 "DIR_ITEM" : "DIR_INDEX",
4266                                 key->objectid, key->offset, name_len);
4267
4268                         if (cur + sizeof(*di) > total)
4269                                 break;
4270                         len = min_t(u32, total - cur - sizeof(*di),
4271                                     BTRFS_NAME_LEN);
4272                 } else {
4273                         len = name_len;
4274                 }
4275
4276                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4277                 if (len != namelen || strncmp(namebuf, name, len))
4278                         goto next;
4279
4280                 ret = 0;
4281                 goto out;
4282 next:
4283                 len = sizeof(*di) + name_len + data_len;
4284                 di = (struct btrfs_dir_item *)((char *)di + len);
4285                 cur += len;
4286         }
4287         if (ret == DIR_ITEM_MISMATCH)
4288                 error(
4289                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4290                         root->objectid,
4291                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4292                         ref_key->objectid, ref_key->offset,
4293                         key->type == BTRFS_DIR_ITEM_KEY ?
4294                                 "DIR_ITEM" : "DIR_INDEX",
4295                         key->objectid, key->offset, namelen, name,
4296                         imode_to_type(mode));
4297 out:
4298         btrfs_release_path(&path);
4299         return ret;
4300 }
4301
4302 /*
4303  * Traverse the given INODE_REF and call find_dir_item() to find related
4304  * DIR_ITEM/DIR_INDEX.
4305  *
4306  * @root:       the root of the fs/file tree
4307  * @ref_key:    the key of the INODE_REF
4308  * @refs:       the count of INODE_REF
4309  * @mode:       the st_mode of INODE_ITEM
4310  *
4311  * Return 0 if no error occurred.
4312  */
4313 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4314                            struct extent_buffer *node, int slot, u64 *refs,
4315                            int mode)
4316 {
4317         struct btrfs_key key;
4318         struct btrfs_inode_ref *ref;
4319         char namebuf[BTRFS_NAME_LEN] = {0};
4320         u32 total;
4321         u32 cur = 0;
4322         u32 len;
4323         u32 name_len;
4324         u64 index;
4325         int ret, err = 0;
4326
4327         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4328         total = btrfs_item_size_nr(node, slot);
4329
4330 next:
4331         /* Update inode ref count */
4332         (*refs)++;
4333
4334         index = btrfs_inode_ref_index(node, ref);
4335         name_len = btrfs_inode_ref_name_len(node, ref);
4336         if (cur + sizeof(*ref) + name_len > total ||
4337             name_len > BTRFS_NAME_LEN) {
4338                 warning("root %llu INODE_REF[%llu %llu] name too long",
4339                         root->objectid, ref_key->objectid, ref_key->offset);
4340
4341                 if (total < cur + sizeof(*ref))
4342                         goto out;
4343                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4344         } else {
4345                 len = name_len;
4346         }
4347
4348         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4349
4350         /* Check root dir ref name */
4351         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4352                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4353                       root->objectid, ref_key->objectid, ref_key->offset,
4354                       namebuf);
4355                 err |= ROOT_DIR_ERROR;
4356         }
4357
4358         /* Find related DIR_INDEX */
4359         key.objectid = ref_key->offset;
4360         key.type = BTRFS_DIR_INDEX_KEY;
4361         key.offset = index;
4362         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4363         err |= ret;
4364
4365         /* Find related dir_item */
4366         key.objectid = ref_key->offset;
4367         key.type = BTRFS_DIR_ITEM_KEY;
4368         key.offset = btrfs_name_hash(namebuf, len);
4369         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4370         err |= ret;
4371
4372         len = sizeof(*ref) + name_len;
4373         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4374         cur += len;
4375         if (cur < total)
4376                 goto next;
4377
4378 out:
4379         return err;
4380 }
4381
4382 /*
4383  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4384  * DIR_ITEM/DIR_INDEX.
4385  *
4386  * @root:       the root of the fs/file tree
4387  * @ref_key:    the key of the INODE_EXTREF
4388  * @refs:       the count of INODE_EXTREF
4389  * @mode:       the st_mode of INODE_ITEM
4390  *
4391  * Return 0 if no error occurred.
4392  */
4393 static int check_inode_extref(struct btrfs_root *root,
4394                               struct btrfs_key *ref_key,
4395                               struct extent_buffer *node, int slot, u64 *refs,
4396                               int mode)
4397 {
4398         struct btrfs_key key;
4399         struct btrfs_inode_extref *extref;
4400         char namebuf[BTRFS_NAME_LEN] = {0};
4401         u32 total;
4402         u32 cur = 0;
4403         u32 len;
4404         u32 name_len;
4405         u64 index;
4406         u64 parent;
4407         int ret;
4408         int err = 0;
4409
4410         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4411         total = btrfs_item_size_nr(node, slot);
4412
4413 next:
4414         /* update inode ref count */
4415         (*refs)++;
4416         name_len = btrfs_inode_extref_name_len(node, extref);
4417         index = btrfs_inode_extref_index(node, extref);
4418         parent = btrfs_inode_extref_parent(node, extref);
4419         if (name_len <= BTRFS_NAME_LEN) {
4420                 len = name_len;
4421         } else {
4422                 len = BTRFS_NAME_LEN;
4423                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4424                         root->objectid, ref_key->objectid, ref_key->offset);
4425         }
4426         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4427
4428         /* Check root dir ref name */
4429         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4430                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4431                       root->objectid, ref_key->objectid, ref_key->offset,
4432                       namebuf);
4433                 err |= ROOT_DIR_ERROR;
4434         }
4435
4436         /* find related dir_index */
4437         key.objectid = parent;
4438         key.type = BTRFS_DIR_INDEX_KEY;
4439         key.offset = index;
4440         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4441         err |= ret;
4442
4443         /* find related dir_item */
4444         key.objectid = parent;
4445         key.type = BTRFS_DIR_ITEM_KEY;
4446         key.offset = btrfs_name_hash(namebuf, len);
4447         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4448         err |= ret;
4449
4450         len = sizeof(*extref) + name_len;
4451         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4452         cur += len;
4453
4454         if (cur < total)
4455                 goto next;
4456
4457         return err;
4458 }
4459
4460 /*
4461  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4462  * DIR_ITEM/DIR_INDEX match.
4463  *
4464  * @root:       the root of the fs/file tree
4465  * @key:        the key of the INODE_REF/INODE_EXTREF
4466  * @name:       the name in the INODE_REF/INODE_EXTREF
4467  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4468  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4469  * to (u64)-1
4470  * @ext_ref:    the EXTENDED_IREF feature
4471  *
4472  * Return 0 if no error occurred.
4473  * Return >0 for error bitmap
4474  */
4475 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4476                           char *name, int namelen, u64 index,
4477                           unsigned int ext_ref)
4478 {
4479         struct btrfs_path path;
4480         struct btrfs_inode_ref *ref;
4481         struct btrfs_inode_extref *extref;
4482         struct extent_buffer *node;
4483         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4484         u32 total;
4485         u32 cur = 0;
4486         u32 len;
4487         u32 ref_namelen;
4488         u64 ref_index;
4489         u64 parent;
4490         u64 dir_id;
4491         int slot;
4492         int ret;
4493
4494         btrfs_init_path(&path);
4495         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4496         if (ret) {
4497                 ret = INODE_REF_MISSING;
4498                 goto extref;
4499         }
4500
4501         node = path.nodes[0];
4502         slot = path.slots[0];
4503
4504         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4505         total = btrfs_item_size_nr(node, slot);
4506
4507         /* Iterate all entry of INODE_REF */
4508         while (cur < total) {
4509                 ret = INODE_REF_MISSING;
4510
4511                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4512                 ref_index = btrfs_inode_ref_index(node, ref);
4513                 if (index != (u64)-1 && index != ref_index)
4514                         goto next_ref;
4515
4516                 if (cur + sizeof(*ref) + ref_namelen > total ||
4517                     ref_namelen > BTRFS_NAME_LEN) {
4518                         warning("root %llu INODE %s[%llu %llu] name too long",
4519                                 root->objectid,
4520                                 key->type == BTRFS_INODE_REF_KEY ?
4521                                         "REF" : "EXTREF",
4522                                 key->objectid, key->offset);
4523
4524                         if (cur + sizeof(*ref) > total)
4525                                 break;
4526                         len = min_t(u32, total - cur - sizeof(*ref),
4527                                     BTRFS_NAME_LEN);
4528                 } else {
4529                         len = ref_namelen;
4530                 }
4531
4532                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4533                                    len);
4534
4535                 if (len != namelen || strncmp(ref_namebuf, name, len))
4536                         goto next_ref;
4537
4538                 ret = 0;
4539                 goto out;
4540 next_ref:
4541                 len = sizeof(*ref) + ref_namelen;
4542                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4543                 cur += len;
4544         }
4545
4546 extref:
4547         /* Skip if not support EXTENDED_IREF feature */
4548         if (!ext_ref)
4549                 goto out;
4550
4551         btrfs_release_path(&path);
4552         btrfs_init_path(&path);
4553
4554         dir_id = key->offset;
4555         key->type = BTRFS_INODE_EXTREF_KEY;
4556         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4557
4558         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4559         if (ret) {
4560                 ret = INODE_REF_MISSING;
4561                 goto out;
4562         }
4563
4564         node = path.nodes[0];
4565         slot = path.slots[0];
4566
4567         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4568         cur = 0;
4569         total = btrfs_item_size_nr(node, slot);
4570
4571         /* Iterate all entry of INODE_EXTREF */
4572         while (cur < total) {
4573                 ret = INODE_REF_MISSING;
4574
4575                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4576                 ref_index = btrfs_inode_extref_index(node, extref);
4577                 parent = btrfs_inode_extref_parent(node, extref);
4578                 if (index != (u64)-1 && index != ref_index)
4579                         goto next_extref;
4580
4581                 if (parent != dir_id)
4582                         goto next_extref;
4583
4584                 if (ref_namelen <= BTRFS_NAME_LEN) {
4585                         len = ref_namelen;
4586                 } else {
4587                         len = BTRFS_NAME_LEN;
4588                         warning("root %llu INODE %s[%llu %llu] name too long",
4589                                 root->objectid,
4590                                 key->type == BTRFS_INODE_REF_KEY ?
4591                                         "REF" : "EXTREF",
4592                                 key->objectid, key->offset);
4593                 }
4594                 read_extent_buffer(node, ref_namebuf,
4595                                    (unsigned long)(extref + 1), len);
4596
4597                 if (len != namelen || strncmp(ref_namebuf, name, len))
4598                         goto next_extref;
4599
4600                 ret = 0;
4601                 goto out;
4602
4603 next_extref:
4604                 len = sizeof(*extref) + ref_namelen;
4605                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4606                 cur += len;
4607
4608         }
4609 out:
4610         btrfs_release_path(&path);
4611         return ret;
4612 }
4613
4614 /*
4615  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4616  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4617  *
4618  * @root:       the root of the fs/file tree
4619  * @key:        the key of the INODE_REF/INODE_EXTREF
4620  * @size:       the st_size of the INODE_ITEM
4621  * @ext_ref:    the EXTENDED_IREF feature
4622  *
4623  * Return 0 if no error occurred.
4624  */
4625 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4626                           struct extent_buffer *node, int slot, u64 *size,
4627                           unsigned int ext_ref)
4628 {
4629         struct btrfs_dir_item *di;
4630         struct btrfs_inode_item *ii;
4631         struct btrfs_path path;
4632         struct btrfs_key location;
4633         char namebuf[BTRFS_NAME_LEN] = {0};
4634         u32 total;
4635         u32 cur = 0;
4636         u32 len;
4637         u32 name_len;
4638         u32 data_len;
4639         u8 filetype;
4640         u32 mode;
4641         u64 index;
4642         int ret;
4643         int err = 0;
4644
4645         /*
4646          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4647          * ignore index check.
4648          */
4649         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4650
4651         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4652         total = btrfs_item_size_nr(node, slot);
4653
4654         while (cur < total) {
4655                 data_len = btrfs_dir_data_len(node, di);
4656                 if (data_len)
4657                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4658                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4659                               "DIR_ITEM" : "DIR_INDEX",
4660                               key->objectid, key->offset, data_len);
4661
4662                 name_len = btrfs_dir_name_len(node, di);
4663                 if (cur + sizeof(*di) + name_len > total ||
4664                     name_len > BTRFS_NAME_LEN) {
4665                         warning("root %llu %s[%llu %llu] name too long",
4666                                 root->objectid,
4667                                 key->type == BTRFS_DIR_ITEM_KEY ?
4668                                 "DIR_ITEM" : "DIR_INDEX",
4669                                 key->objectid, key->offset);
4670
4671                         if (cur + sizeof(*di) > total)
4672                                 break;
4673                         len = min_t(u32, total - cur - sizeof(*di),
4674                                     BTRFS_NAME_LEN);
4675                 } else {
4676                         len = name_len;
4677                 }
4678                 (*size) += name_len;
4679
4680                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4681                 filetype = btrfs_dir_type(node, di);
4682
4683                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4684                     key->offset != btrfs_name_hash(namebuf, len)) {
4685                         err |= -EIO;
4686                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4687                                 root->objectid, key->objectid, key->offset,
4688                                 namebuf, len, filetype, key->offset,
4689                                 btrfs_name_hash(namebuf, len));
4690                 }
4691
4692                 btrfs_init_path(&path);
4693                 btrfs_dir_item_key_to_cpu(node, di, &location);
4694
4695                 /* Ignore related ROOT_ITEM check */
4696                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4697                         goto next;
4698
4699                 /* Check relative INODE_ITEM(existence/filetype) */
4700                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4701                 if (ret) {
4702                         err |= INODE_ITEM_MISSING;
4703                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4704                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4705                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4706                               key->offset, location.objectid, name_len,
4707                               namebuf, filetype);
4708                         goto next;
4709                 }
4710
4711                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4712                                     struct btrfs_inode_item);
4713                 mode = btrfs_inode_mode(path.nodes[0], ii);
4714
4715                 if (imode_to_type(mode) != filetype) {
4716                         err |= INODE_ITEM_MISMATCH;
4717                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4718                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4719                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4720                               key->offset, name_len, namebuf, filetype);
4721                 }
4722
4723                 /* Check relative INODE_REF/INODE_EXTREF */
4724                 location.type = BTRFS_INODE_REF_KEY;
4725                 location.offset = key->objectid;
4726                 ret = find_inode_ref(root, &location, namebuf, len,
4727                                        index, ext_ref);
4728                 err |= ret;
4729                 if (ret & INODE_REF_MISSING)
4730                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4731                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4732                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4733                               key->offset, name_len, namebuf, filetype);
4734
4735 next:
4736                 btrfs_release_path(&path);
4737                 len = sizeof(*di) + name_len + data_len;
4738                 di = (struct btrfs_dir_item *)((char *)di + len);
4739                 cur += len;
4740
4741                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4742                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4743                               root->objectid, key->objectid, key->offset);
4744                         break;
4745                 }
4746         }
4747
4748         return err;
4749 }
4750
4751 /*
4752  * Check file extent datasum/hole, update the size of the file extents,
4753  * check and update the last offset of the file extent.
4754  *
4755  * @root:       the root of fs/file tree.
4756  * @fkey:       the key of the file extent.
4757  * @nodatasum:  INODE_NODATASUM feature.
4758  * @size:       the sum of all EXTENT_DATA items size for this inode.
4759  * @end:        the offset of the last extent.
4760  *
4761  * Return 0 if no error occurred.
4762  */
4763 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4764                              struct extent_buffer *node, int slot,
4765                              unsigned int nodatasum, u64 *size, u64 *end)
4766 {
4767         struct btrfs_file_extent_item *fi;
4768         u64 disk_bytenr;
4769         u64 disk_num_bytes;
4770         u64 extent_num_bytes;
4771         u64 extent_offset;
4772         u64 csum_found;         /* In byte size, sectorsize aligned */
4773         u64 search_start;       /* Logical range start we search for csum */
4774         u64 search_len;         /* Logical range len we search for csum */
4775         unsigned int extent_type;
4776         unsigned int is_hole;
4777         int compressed = 0;
4778         int ret;
4779         int err = 0;
4780
4781         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4782
4783         /* Check inline extent */
4784         extent_type = btrfs_file_extent_type(node, fi);
4785         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4786                 struct btrfs_item *e = btrfs_item_nr(slot);
4787                 u32 item_inline_len;
4788
4789                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4790                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4791                 compressed = btrfs_file_extent_compression(node, fi);
4792                 if (extent_num_bytes == 0) {
4793                         error(
4794                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4795                                 root->objectid, fkey->objectid, fkey->offset);
4796                         err |= FILE_EXTENT_ERROR;
4797                 }
4798                 if (!compressed && extent_num_bytes != item_inline_len) {
4799                         error(
4800                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4801                                 root->objectid, fkey->objectid, fkey->offset,
4802                                 extent_num_bytes, item_inline_len);
4803                         err |= FILE_EXTENT_ERROR;
4804                 }
4805                 *end += extent_num_bytes;
4806                 *size += extent_num_bytes;
4807                 return err;
4808         }
4809
4810         /* Check extent type */
4811         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4812                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4813                 err |= FILE_EXTENT_ERROR;
4814                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4815                       root->objectid, fkey->objectid, fkey->offset);
4816                 return err;
4817         }
4818
4819         /* Check REG_EXTENT/PREALLOC_EXTENT */
4820         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4821         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4822         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4823         extent_offset = btrfs_file_extent_offset(node, fi);
4824         compressed = btrfs_file_extent_compression(node, fi);
4825         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4826
4827         /*
4828          * Check EXTENT_DATA csum
4829          *
4830          * For plain (uncompressed) extent, we should only check the range
4831          * we're referring to, as it's possible that part of prealloc extent
4832          * has been written, and has csum:
4833          *
4834          * |<--- Original large preallocated extent A ---->|
4835          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4836          *      No csum                         Has csum
4837          *
4838          * For compressed extent, we should check the whole range.
4839          */
4840         if (!compressed) {
4841                 search_start = disk_bytenr + extent_offset;
4842                 search_len = extent_num_bytes;
4843         } else {
4844                 search_start = disk_bytenr;
4845                 search_len = disk_num_bytes;
4846         }
4847         ret = count_csum_range(root, search_start, search_len, &csum_found);
4848         if (csum_found > 0 && nodatasum) {
4849                 err |= ODD_CSUM_ITEM;
4850                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4851                       root->objectid, fkey->objectid, fkey->offset);
4852         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4853                    !is_hole && (ret < 0 || csum_found < search_len)) {
4854                 err |= CSUM_ITEM_MISSING;
4855                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4856                       root->objectid, fkey->objectid, fkey->offset,
4857                       csum_found, search_len);
4858         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4859                 err |= ODD_CSUM_ITEM;
4860                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4861                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4862         }
4863
4864         /* Check EXTENT_DATA hole */
4865         if (!no_holes && *end != fkey->offset) {
4866                 err |= FILE_EXTENT_ERROR;
4867                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4868                       root->objectid, fkey->objectid, fkey->offset);
4869         }
4870
4871         *end += extent_num_bytes;
4872         if (!is_hole)
4873                 *size += extent_num_bytes;
4874
4875         return err;
4876 }
4877
4878 /*
4879  * Check INODE_ITEM and related ITEMs (the same inode number)
4880  * 1. check link count
4881  * 2. check inode ref/extref
4882  * 3. check dir item/index
4883  *
4884  * @ext_ref:    the EXTENDED_IREF feature
4885  *
4886  * Return 0 if no error occurred.
4887  * Return >0 for error or hit the traversal is done(by error bitmap)
4888  */
4889 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4890                             unsigned int ext_ref)
4891 {
4892         struct extent_buffer *node;
4893         struct btrfs_inode_item *ii;
4894         struct btrfs_key key;
4895         u64 inode_id;
4896         u32 mode;
4897         u64 nlink;
4898         u64 nbytes;
4899         u64 isize;
4900         u64 size = 0;
4901         u64 refs = 0;
4902         u64 extent_end = 0;
4903         u64 extent_size = 0;
4904         unsigned int dir;
4905         unsigned int nodatasum;
4906         int slot;
4907         int ret;
4908         int err = 0;
4909
4910         node = path->nodes[0];
4911         slot = path->slots[0];
4912
4913         btrfs_item_key_to_cpu(node, &key, slot);
4914         inode_id = key.objectid;
4915
4916         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4917                 ret = btrfs_next_item(root, path);
4918                 if (ret > 0)
4919                         err |= LAST_ITEM;
4920                 return err;
4921         }
4922
4923         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4924         isize = btrfs_inode_size(node, ii);
4925         nbytes = btrfs_inode_nbytes(node, ii);
4926         mode = btrfs_inode_mode(node, ii);
4927         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4928         nlink = btrfs_inode_nlink(node, ii);
4929         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4930
4931         while (1) {
4932                 ret = btrfs_next_item(root, path);
4933                 if (ret < 0) {
4934                         /* out will fill 'err' rusing current statistics */
4935                         goto out;
4936                 } else if (ret > 0) {
4937                         err |= LAST_ITEM;
4938                         goto out;
4939                 }
4940
4941                 node = path->nodes[0];
4942                 slot = path->slots[0];
4943                 btrfs_item_key_to_cpu(node, &key, slot);
4944                 if (key.objectid != inode_id)
4945                         goto out;
4946
4947                 switch (key.type) {
4948                 case BTRFS_INODE_REF_KEY:
4949                         ret = check_inode_ref(root, &key, node, slot, &refs,
4950                                               mode);
4951                         err |= ret;
4952                         break;
4953                 case BTRFS_INODE_EXTREF_KEY:
4954                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4955                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4956                                         root->objectid, key.objectid,
4957                                         key.offset);
4958                         ret = check_inode_extref(root, &key, node, slot, &refs,
4959                                                  mode);
4960                         err |= ret;
4961                         break;
4962                 case BTRFS_DIR_ITEM_KEY:
4963                 case BTRFS_DIR_INDEX_KEY:
4964                         if (!dir) {
4965                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4966                                         root->objectid, inode_id,
4967                                         imode_to_type(mode), key.objectid,
4968                                         key.offset);
4969                         }
4970                         ret = check_dir_item(root, &key, node, slot, &size,
4971                                              ext_ref);
4972                         err |= ret;
4973                         break;
4974                 case BTRFS_EXTENT_DATA_KEY:
4975                         if (dir) {
4976                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4977                                         root->objectid, inode_id, key.objectid,
4978                                         key.offset);
4979                         }
4980                         ret = check_file_extent(root, &key, node, slot,
4981                                                 nodatasum, &extent_size,
4982                                                 &extent_end);
4983                         err |= ret;
4984                         break;
4985                 case BTRFS_XATTR_ITEM_KEY:
4986                         break;
4987                 default:
4988                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4989                               key.objectid, key.type, key.offset);
4990                 }
4991         }
4992
4993 out:
4994         /* verify INODE_ITEM nlink/isize/nbytes */
4995         if (dir) {
4996                 if (nlink != 1) {
4997                         err |= LINK_COUNT_ERROR;
4998                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4999                               root->objectid, inode_id, nlink);
5000                 }
5001
5002                 /*
5003                  * Just a warning, as dir inode nbytes is just an
5004                  * instructive value.
5005                  */
5006                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5007                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5008                                 root->objectid, inode_id,
5009                                 root->fs_info->nodesize);
5010                 }
5011
5012                 if (isize != size) {
5013                         err |= ISIZE_ERROR;
5014                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5015                               root->objectid, inode_id, isize, size);
5016                 }
5017         } else {
5018                 if (nlink != refs) {
5019                         err |= LINK_COUNT_ERROR;
5020                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5021                               root->objectid, inode_id, nlink, refs);
5022                 } else if (!nlink) {
5023                         err |= ORPHAN_ITEM;
5024                 }
5025
5026                 if (!nbytes && !no_holes && extent_end < isize) {
5027                         err |= NBYTES_ERROR;
5028                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5029                               root->objectid, inode_id, isize);
5030                 }
5031
5032                 if (nbytes != extent_size) {
5033                         err |= NBYTES_ERROR;
5034                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5035                               root->objectid, inode_id, nbytes, extent_size);
5036                 }
5037         }
5038
5039         return err;
5040 }
5041
5042 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5043 {
5044         struct btrfs_path path;
5045         struct btrfs_key key;
5046         int err = 0;
5047         int ret;
5048
5049         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5050         key.type = BTRFS_INODE_ITEM_KEY;
5051         key.offset = 0;
5052
5053         /* For root being dropped, we don't need to check first inode */
5054         if (btrfs_root_refs(&root->root_item) == 0 &&
5055             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5056             key.objectid)
5057                 return 0;
5058
5059         btrfs_init_path(&path);
5060
5061         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5062         if (ret < 0)
5063                 goto out;
5064         if (ret > 0) {
5065                 ret = 0;
5066                 err |= INODE_ITEM_MISSING;
5067                 error("first inode item of root %llu is missing",
5068                       root->objectid);
5069         }
5070
5071         err |= check_inode_item(root, &path, ext_ref);
5072         err &= ~LAST_ITEM;
5073         if (err && !ret)
5074                 ret = -EIO;
5075 out:
5076         btrfs_release_path(&path);
5077         return ret;
5078 }
5079
5080 /*
5081  * Iterate all item on the tree and call check_inode_item() to check.
5082  *
5083  * @root:       the root of the tree to be checked.
5084  * @ext_ref:    the EXTENDED_IREF feature
5085  *
5086  * Return 0 if no error found.
5087  * Return <0 for error.
5088  */
5089 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5090 {
5091         struct btrfs_path path;
5092         struct node_refs nrefs;
5093         struct btrfs_root_item *root_item = &root->root_item;
5094         int ret;
5095         int level;
5096         int err = 0;
5097
5098         /*
5099          * We need to manually check the first inode item(256)
5100          * As the following traversal function will only start from
5101          * the first inode item in the leaf, if inode item(256) is missing
5102          * we will just skip it forever.
5103          */
5104         ret = check_fs_first_inode(root, ext_ref);
5105         if (ret < 0)
5106                 return ret;
5107
5108         memset(&nrefs, 0, sizeof(nrefs));
5109         level = btrfs_header_level(root->node);
5110         btrfs_init_path(&path);
5111
5112         if (btrfs_root_refs(root_item) > 0 ||
5113             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5114                 path.nodes[level] = root->node;
5115                 path.slots[level] = 0;
5116                 extent_buffer_get(root->node);
5117         } else {
5118                 struct btrfs_key key;
5119
5120                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5121                 level = root_item->drop_level;
5122                 path.lowest_level = level;
5123                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5124                 if (ret < 0)
5125                         goto out;
5126                 ret = 0;
5127         }
5128
5129         while (1) {
5130                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5131                 err |= !!ret;
5132
5133                 /* if ret is negative, walk shall stop */
5134                 if (ret < 0) {
5135                         ret = err;
5136                         break;
5137                 }
5138
5139                 ret = walk_up_tree_v2(root, &path, &level);
5140                 if (ret != 0) {
5141                         /* Normal exit, reset ret to err */
5142                         ret = err;
5143                         break;
5144                 }
5145         }
5146
5147 out:
5148         btrfs_release_path(&path);
5149         return ret;
5150 }
5151
5152 /*
5153  * Find the relative ref for root_ref and root_backref.
5154  *
5155  * @root:       the root of the root tree.
5156  * @ref_key:    the key of the root ref.
5157  *
5158  * Return 0 if no error occurred.
5159  */
5160 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5161                           struct extent_buffer *node, int slot)
5162 {
5163         struct btrfs_path path;
5164         struct btrfs_key key;
5165         struct btrfs_root_ref *ref;
5166         struct btrfs_root_ref *backref;
5167         char ref_name[BTRFS_NAME_LEN] = {0};
5168         char backref_name[BTRFS_NAME_LEN] = {0};
5169         u64 ref_dirid;
5170         u64 ref_seq;
5171         u32 ref_namelen;
5172         u64 backref_dirid;
5173         u64 backref_seq;
5174         u32 backref_namelen;
5175         u32 len;
5176         int ret;
5177         int err = 0;
5178
5179         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5180         ref_dirid = btrfs_root_ref_dirid(node, ref);
5181         ref_seq = btrfs_root_ref_sequence(node, ref);
5182         ref_namelen = btrfs_root_ref_name_len(node, ref);
5183
5184         if (ref_namelen <= BTRFS_NAME_LEN) {
5185                 len = ref_namelen;
5186         } else {
5187                 len = BTRFS_NAME_LEN;
5188                 warning("%s[%llu %llu] ref_name too long",
5189                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5190                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5191                         ref_key->offset);
5192         }
5193         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5194
5195         /* Find relative root_ref */
5196         key.objectid = ref_key->offset;
5197         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5198         key.offset = ref_key->objectid;
5199
5200         btrfs_init_path(&path);
5201         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5202         if (ret) {
5203                 err |= ROOT_REF_MISSING;
5204                 error("%s[%llu %llu] couldn't find relative ref",
5205                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5206                       "ROOT_REF" : "ROOT_BACKREF",
5207                       ref_key->objectid, ref_key->offset);
5208                 goto out;
5209         }
5210
5211         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5212                                  struct btrfs_root_ref);
5213         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5214         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5215         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5216
5217         if (backref_namelen <= BTRFS_NAME_LEN) {
5218                 len = backref_namelen;
5219         } else {
5220                 len = BTRFS_NAME_LEN;
5221                 warning("%s[%llu %llu] ref_name too long",
5222                         key.type == BTRFS_ROOT_REF_KEY ?
5223                         "ROOT_REF" : "ROOT_BACKREF",
5224                         key.objectid, key.offset);
5225         }
5226         read_extent_buffer(path.nodes[0], backref_name,
5227                            (unsigned long)(backref + 1), len);
5228
5229         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5230             ref_namelen != backref_namelen ||
5231             strncmp(ref_name, backref_name, len)) {
5232                 err |= ROOT_REF_MISMATCH;
5233                 error("%s[%llu %llu] mismatch relative ref",
5234                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5235                       "ROOT_REF" : "ROOT_BACKREF",
5236                       ref_key->objectid, ref_key->offset);
5237         }
5238 out:
5239         btrfs_release_path(&path);
5240         return err;
5241 }
5242
5243 /*
5244  * Check all fs/file tree in low_memory mode.
5245  *
5246  * 1. for fs tree root item, call check_fs_root_v2()
5247  * 2. for fs tree root ref/backref, call check_root_ref()
5248  *
5249  * Return 0 if no error occurred.
5250  */
5251 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5252 {
5253         struct btrfs_root *tree_root = fs_info->tree_root;
5254         struct btrfs_root *cur_root = NULL;
5255         struct btrfs_path path;
5256         struct btrfs_key key;
5257         struct extent_buffer *node;
5258         unsigned int ext_ref;
5259         int slot;
5260         int ret;
5261         int err = 0;
5262
5263         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5264
5265         btrfs_init_path(&path);
5266         key.objectid = BTRFS_FS_TREE_OBJECTID;
5267         key.offset = 0;
5268         key.type = BTRFS_ROOT_ITEM_KEY;
5269
5270         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5271         if (ret < 0) {
5272                 err = ret;
5273                 goto out;
5274         } else if (ret > 0) {
5275                 err = -ENOENT;
5276                 goto out;
5277         }
5278
5279         while (1) {
5280                 node = path.nodes[0];
5281                 slot = path.slots[0];
5282                 btrfs_item_key_to_cpu(node, &key, slot);
5283                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5284                         goto out;
5285                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5286                     fs_root_objectid(key.objectid)) {
5287                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5288                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5289                                                                        &key);
5290                         } else {
5291                                 key.offset = (u64)-1;
5292                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5293                         }
5294
5295                         if (IS_ERR(cur_root)) {
5296                                 error("Fail to read fs/subvol tree: %lld",
5297                                       key.objectid);
5298                                 err = -EIO;
5299                                 goto next;
5300                         }
5301
5302                         ret = check_fs_root_v2(cur_root, ext_ref);
5303                         err |= ret;
5304
5305                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5306                                 btrfs_free_fs_root(cur_root);
5307                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5308                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5309                         ret = check_root_ref(tree_root, &key, node, slot);
5310                         err |= ret;
5311                 }
5312 next:
5313                 ret = btrfs_next_item(tree_root, &path);
5314                 if (ret > 0)
5315                         goto out;
5316                 if (ret < 0) {
5317                         err = ret;
5318                         goto out;
5319                 }
5320         }
5321
5322 out:
5323         btrfs_release_path(&path);
5324         return err;
5325 }
5326
5327 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5328 {
5329         struct list_head *cur = rec->backrefs.next;
5330         struct extent_backref *back;
5331         struct tree_backref *tback;
5332         struct data_backref *dback;
5333         u64 found = 0;
5334         int err = 0;
5335
5336         while(cur != &rec->backrefs) {
5337                 back = to_extent_backref(cur);
5338                 cur = cur->next;
5339                 if (!back->found_extent_tree) {
5340                         err = 1;
5341                         if (!print_errs)
5342                                 goto out;
5343                         if (back->is_data) {
5344                                 dback = to_data_backref(back);
5345                                 fprintf(stderr, "Backref %llu %s %llu"
5346                                         " owner %llu offset %llu num_refs %lu"
5347                                         " not found in extent tree\n",
5348                                         (unsigned long long)rec->start,
5349                                         back->full_backref ?
5350                                         "parent" : "root",
5351                                         back->full_backref ?
5352                                         (unsigned long long)dback->parent:
5353                                         (unsigned long long)dback->root,
5354                                         (unsigned long long)dback->owner,
5355                                         (unsigned long long)dback->offset,
5356                                         (unsigned long)dback->num_refs);
5357                         } else {
5358                                 tback = to_tree_backref(back);
5359                                 fprintf(stderr, "Backref %llu parent %llu"
5360                                         " root %llu not found in extent tree\n",
5361                                         (unsigned long long)rec->start,
5362                                         (unsigned long long)tback->parent,
5363                                         (unsigned long long)tback->root);
5364                         }
5365                 }
5366                 if (!back->is_data && !back->found_ref) {
5367                         err = 1;
5368                         if (!print_errs)
5369                                 goto out;
5370                         tback = to_tree_backref(back);
5371                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5372                                 (unsigned long long)rec->start,
5373                                 back->full_backref ? "parent" : "root",
5374                                 back->full_backref ?
5375                                 (unsigned long long)tback->parent :
5376                                 (unsigned long long)tback->root, back);
5377                 }
5378                 if (back->is_data) {
5379                         dback = to_data_backref(back);
5380                         if (dback->found_ref != dback->num_refs) {
5381                                 err = 1;
5382                                 if (!print_errs)
5383                                         goto out;
5384                                 fprintf(stderr, "Incorrect local backref count"
5385                                         " on %llu %s %llu owner %llu"
5386                                         " offset %llu found %u wanted %u back %p\n",
5387                                         (unsigned long long)rec->start,
5388                                         back->full_backref ?
5389                                         "parent" : "root",
5390                                         back->full_backref ?
5391                                         (unsigned long long)dback->parent:
5392                                         (unsigned long long)dback->root,
5393                                         (unsigned long long)dback->owner,
5394                                         (unsigned long long)dback->offset,
5395                                         dback->found_ref, dback->num_refs, back);
5396                         }
5397                         if (dback->disk_bytenr != rec->start) {
5398                                 err = 1;
5399                                 if (!print_errs)
5400                                         goto out;
5401                                 fprintf(stderr, "Backref disk bytenr does not"
5402                                         " match extent record, bytenr=%llu, "
5403                                         "ref bytenr=%llu\n",
5404                                         (unsigned long long)rec->start,
5405                                         (unsigned long long)dback->disk_bytenr);
5406                         }
5407
5408                         if (dback->bytes != rec->nr) {
5409                                 err = 1;
5410                                 if (!print_errs)
5411                                         goto out;
5412                                 fprintf(stderr, "Backref bytes do not match "
5413                                         "extent backref, bytenr=%llu, ref "
5414                                         "bytes=%llu, backref bytes=%llu\n",
5415                                         (unsigned long long)rec->start,
5416                                         (unsigned long long)rec->nr,
5417                                         (unsigned long long)dback->bytes);
5418                         }
5419                 }
5420                 if (!back->is_data) {
5421                         found += 1;
5422                 } else {
5423                         dback = to_data_backref(back);
5424                         found += dback->found_ref;
5425                 }
5426         }
5427         if (found != rec->refs) {
5428                 err = 1;
5429                 if (!print_errs)
5430                         goto out;
5431                 fprintf(stderr, "Incorrect global backref count "
5432                         "on %llu found %llu wanted %llu\n",
5433                         (unsigned long long)rec->start,
5434                         (unsigned long long)found,
5435                         (unsigned long long)rec->refs);
5436         }
5437 out:
5438         return err;
5439 }
5440
5441 static int free_all_extent_backrefs(struct extent_record *rec)
5442 {
5443         struct extent_backref *back;
5444         struct list_head *cur;
5445         while (!list_empty(&rec->backrefs)) {
5446                 cur = rec->backrefs.next;
5447                 back = to_extent_backref(cur);
5448                 list_del(cur);
5449                 free(back);
5450         }
5451         return 0;
5452 }
5453
5454 static void free_extent_record_cache(struct cache_tree *extent_cache)
5455 {
5456         struct cache_extent *cache;
5457         struct extent_record *rec;
5458
5459         while (1) {
5460                 cache = first_cache_extent(extent_cache);
5461                 if (!cache)
5462                         break;
5463                 rec = container_of(cache, struct extent_record, cache);
5464                 remove_cache_extent(extent_cache, cache);
5465                 free_all_extent_backrefs(rec);
5466                 free(rec);
5467         }
5468 }
5469
5470 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5471                                  struct extent_record *rec)
5472 {
5473         if (rec->content_checked && rec->owner_ref_checked &&
5474             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5475             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5476             !rec->bad_full_backref && !rec->crossing_stripes &&
5477             !rec->wrong_chunk_type) {
5478                 remove_cache_extent(extent_cache, &rec->cache);
5479                 free_all_extent_backrefs(rec);
5480                 list_del_init(&rec->list);
5481                 free(rec);
5482         }
5483         return 0;
5484 }
5485
5486 static int check_owner_ref(struct btrfs_root *root,
5487                             struct extent_record *rec,
5488                             struct extent_buffer *buf)
5489 {
5490         struct extent_backref *node;
5491         struct tree_backref *back;
5492         struct btrfs_root *ref_root;
5493         struct btrfs_key key;
5494         struct btrfs_path path;
5495         struct extent_buffer *parent;
5496         int level;
5497         int found = 0;
5498         int ret;
5499
5500         list_for_each_entry(node, &rec->backrefs, list) {
5501                 if (node->is_data)
5502                         continue;
5503                 if (!node->found_ref)
5504                         continue;
5505                 if (node->full_backref)
5506                         continue;
5507                 back = to_tree_backref(node);
5508                 if (btrfs_header_owner(buf) == back->root)
5509                         return 0;
5510         }
5511         BUG_ON(rec->is_root);
5512
5513         /* try to find the block by search corresponding fs tree */
5514         key.objectid = btrfs_header_owner(buf);
5515         key.type = BTRFS_ROOT_ITEM_KEY;
5516         key.offset = (u64)-1;
5517
5518         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5519         if (IS_ERR(ref_root))
5520                 return 1;
5521
5522         level = btrfs_header_level(buf);
5523         if (level == 0)
5524                 btrfs_item_key_to_cpu(buf, &key, 0);
5525         else
5526                 btrfs_node_key_to_cpu(buf, &key, 0);
5527
5528         btrfs_init_path(&path);
5529         path.lowest_level = level + 1;
5530         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5531         if (ret < 0)
5532                 return 0;
5533
5534         parent = path.nodes[level + 1];
5535         if (parent && buf->start == btrfs_node_blockptr(parent,
5536                                                         path.slots[level + 1]))
5537                 found = 1;
5538
5539         btrfs_release_path(&path);
5540         return found ? 0 : 1;
5541 }
5542
5543 static int is_extent_tree_record(struct extent_record *rec)
5544 {
5545         struct list_head *cur = rec->backrefs.next;
5546         struct extent_backref *node;
5547         struct tree_backref *back;
5548         int is_extent = 0;
5549
5550         while(cur != &rec->backrefs) {
5551                 node = to_extent_backref(cur);
5552                 cur = cur->next;
5553                 if (node->is_data)
5554                         return 0;
5555                 back = to_tree_backref(node);
5556                 if (node->full_backref)
5557                         return 0;
5558                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5559                         is_extent = 1;
5560         }
5561         return is_extent;
5562 }
5563
5564
5565 static int record_bad_block_io(struct btrfs_fs_info *info,
5566                                struct cache_tree *extent_cache,
5567                                u64 start, u64 len)
5568 {
5569         struct extent_record *rec;
5570         struct cache_extent *cache;
5571         struct btrfs_key key;
5572
5573         cache = lookup_cache_extent(extent_cache, start, len);
5574         if (!cache)
5575                 return 0;
5576
5577         rec = container_of(cache, struct extent_record, cache);
5578         if (!is_extent_tree_record(rec))
5579                 return 0;
5580
5581         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5582         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5583 }
5584
5585 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5586                        struct extent_buffer *buf, int slot)
5587 {
5588         if (btrfs_header_level(buf)) {
5589                 struct btrfs_key_ptr ptr1, ptr2;
5590
5591                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5592                                    sizeof(struct btrfs_key_ptr));
5593                 read_extent_buffer(buf, &ptr2,
5594                                    btrfs_node_key_ptr_offset(slot + 1),
5595                                    sizeof(struct btrfs_key_ptr));
5596                 write_extent_buffer(buf, &ptr1,
5597                                     btrfs_node_key_ptr_offset(slot + 1),
5598                                     sizeof(struct btrfs_key_ptr));
5599                 write_extent_buffer(buf, &ptr2,
5600                                     btrfs_node_key_ptr_offset(slot),
5601                                     sizeof(struct btrfs_key_ptr));
5602                 if (slot == 0) {
5603                         struct btrfs_disk_key key;
5604                         btrfs_node_key(buf, &key, 0);
5605                         btrfs_fixup_low_keys(root, path, &key,
5606                                              btrfs_header_level(buf) + 1);
5607                 }
5608         } else {
5609                 struct btrfs_item *item1, *item2;
5610                 struct btrfs_key k1, k2;
5611                 char *item1_data, *item2_data;
5612                 u32 item1_offset, item2_offset, item1_size, item2_size;
5613
5614                 item1 = btrfs_item_nr(slot);
5615                 item2 = btrfs_item_nr(slot + 1);
5616                 btrfs_item_key_to_cpu(buf, &k1, slot);
5617                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5618                 item1_offset = btrfs_item_offset(buf, item1);
5619                 item2_offset = btrfs_item_offset(buf, item2);
5620                 item1_size = btrfs_item_size(buf, item1);
5621                 item2_size = btrfs_item_size(buf, item2);
5622
5623                 item1_data = malloc(item1_size);
5624                 if (!item1_data)
5625                         return -ENOMEM;
5626                 item2_data = malloc(item2_size);
5627                 if (!item2_data) {
5628                         free(item1_data);
5629                         return -ENOMEM;
5630                 }
5631
5632                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5633                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5634
5635                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5636                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5637                 free(item1_data);
5638                 free(item2_data);
5639
5640                 btrfs_set_item_offset(buf, item1, item2_offset);
5641                 btrfs_set_item_offset(buf, item2, item1_offset);
5642                 btrfs_set_item_size(buf, item1, item2_size);
5643                 btrfs_set_item_size(buf, item2, item1_size);
5644
5645                 path->slots[0] = slot;
5646                 btrfs_set_item_key_unsafe(root, path, &k2);
5647                 path->slots[0] = slot + 1;
5648                 btrfs_set_item_key_unsafe(root, path, &k1);
5649         }
5650         return 0;
5651 }
5652
5653 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5654 {
5655         struct extent_buffer *buf;
5656         struct btrfs_key k1, k2;
5657         int i;
5658         int level = path->lowest_level;
5659         int ret = -EIO;
5660
5661         buf = path->nodes[level];
5662         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5663                 if (level) {
5664                         btrfs_node_key_to_cpu(buf, &k1, i);
5665                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5666                 } else {
5667                         btrfs_item_key_to_cpu(buf, &k1, i);
5668                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5669                 }
5670                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5671                         continue;
5672                 ret = swap_values(root, path, buf, i);
5673                 if (ret)
5674                         break;
5675                 btrfs_mark_buffer_dirty(buf);
5676                 i = 0;
5677         }
5678         return ret;
5679 }
5680
5681 static int delete_bogus_item(struct btrfs_root *root,
5682                              struct btrfs_path *path,
5683                              struct extent_buffer *buf, int slot)
5684 {
5685         struct btrfs_key key;
5686         int nritems = btrfs_header_nritems(buf);
5687
5688         btrfs_item_key_to_cpu(buf, &key, slot);
5689
5690         /* These are all the keys we can deal with missing. */
5691         if (key.type != BTRFS_DIR_INDEX_KEY &&
5692             key.type != BTRFS_EXTENT_ITEM_KEY &&
5693             key.type != BTRFS_METADATA_ITEM_KEY &&
5694             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5695             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5696                 return -1;
5697
5698         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5699                (unsigned long long)key.objectid, key.type,
5700                (unsigned long long)key.offset, slot, buf->start);
5701         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5702                               btrfs_item_nr_offset(slot + 1),
5703                               sizeof(struct btrfs_item) *
5704                               (nritems - slot - 1));
5705         btrfs_set_header_nritems(buf, nritems - 1);
5706         if (slot == 0) {
5707                 struct btrfs_disk_key disk_key;
5708
5709                 btrfs_item_key(buf, &disk_key, 0);
5710                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5711         }
5712         btrfs_mark_buffer_dirty(buf);
5713         return 0;
5714 }
5715
5716 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5717 {
5718         struct extent_buffer *buf;
5719         int i;
5720         int ret = 0;
5721
5722         /* We should only get this for leaves */
5723         BUG_ON(path->lowest_level);
5724         buf = path->nodes[0];
5725 again:
5726         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5727                 unsigned int shift = 0, offset;
5728
5729                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5730                     BTRFS_LEAF_DATA_SIZE(root)) {
5731                         if (btrfs_item_end_nr(buf, i) >
5732                             BTRFS_LEAF_DATA_SIZE(root)) {
5733                                 ret = delete_bogus_item(root, path, buf, i);
5734                                 if (!ret)
5735                                         goto again;
5736                                 fprintf(stderr, "item is off the end of the "
5737                                         "leaf, can't fix\n");
5738                                 ret = -EIO;
5739                                 break;
5740                         }
5741                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5742                                 btrfs_item_end_nr(buf, i);
5743                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5744                            btrfs_item_offset_nr(buf, i - 1)) {
5745                         if (btrfs_item_end_nr(buf, i) >
5746                             btrfs_item_offset_nr(buf, i - 1)) {
5747                                 ret = delete_bogus_item(root, path, buf, i);
5748                                 if (!ret)
5749                                         goto again;
5750                                 fprintf(stderr, "items overlap, can't fix\n");
5751                                 ret = -EIO;
5752                                 break;
5753                         }
5754                         shift = btrfs_item_offset_nr(buf, i - 1) -
5755                                 btrfs_item_end_nr(buf, i);
5756                 }
5757                 if (!shift)
5758                         continue;
5759
5760                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5761                        i, shift, (unsigned long long)buf->start);
5762                 offset = btrfs_item_offset_nr(buf, i);
5763                 memmove_extent_buffer(buf,
5764                                       btrfs_leaf_data(buf) + offset + shift,
5765                                       btrfs_leaf_data(buf) + offset,
5766                                       btrfs_item_size_nr(buf, i));
5767                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5768                                       offset + shift);
5769                 btrfs_mark_buffer_dirty(buf);
5770         }
5771
5772         /*
5773          * We may have moved things, in which case we want to exit so we don't
5774          * write those changes out.  Once we have proper abort functionality in
5775          * progs this can be changed to something nicer.
5776          */
5777         BUG_ON(ret);
5778         return ret;
5779 }
5780
5781 /*
5782  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5783  * then just return -EIO.
5784  */
5785 static int try_to_fix_bad_block(struct btrfs_root *root,
5786                                 struct extent_buffer *buf,
5787                                 enum btrfs_tree_block_status status)
5788 {
5789         struct btrfs_trans_handle *trans;
5790         struct ulist *roots;
5791         struct ulist_node *node;
5792         struct btrfs_root *search_root;
5793         struct btrfs_path path;
5794         struct ulist_iterator iter;
5795         struct btrfs_key root_key, key;
5796         int ret;
5797
5798         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5799             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5800                 return -EIO;
5801
5802         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5803         if (ret)
5804                 return -EIO;
5805
5806         btrfs_init_path(&path);
5807         ULIST_ITER_INIT(&iter);
5808         while ((node = ulist_next(roots, &iter))) {
5809                 root_key.objectid = node->val;
5810                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5811                 root_key.offset = (u64)-1;
5812
5813                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5814                 if (IS_ERR(root)) {
5815                         ret = -EIO;
5816                         break;
5817                 }
5818
5819
5820                 trans = btrfs_start_transaction(search_root, 0);
5821                 if (IS_ERR(trans)) {
5822                         ret = PTR_ERR(trans);
5823                         break;
5824                 }
5825
5826                 path.lowest_level = btrfs_header_level(buf);
5827                 path.skip_check_block = 1;
5828                 if (path.lowest_level)
5829                         btrfs_node_key_to_cpu(buf, &key, 0);
5830                 else
5831                         btrfs_item_key_to_cpu(buf, &key, 0);
5832                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5833                 if (ret) {
5834                         ret = -EIO;
5835                         btrfs_commit_transaction(trans, search_root);
5836                         break;
5837                 }
5838                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5839                         ret = fix_key_order(search_root, &path);
5840                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5841                         ret = fix_item_offset(search_root, &path);
5842                 if (ret) {
5843                         btrfs_commit_transaction(trans, search_root);
5844                         break;
5845                 }
5846                 btrfs_release_path(&path);
5847                 btrfs_commit_transaction(trans, search_root);
5848         }
5849         ulist_free(roots);
5850         btrfs_release_path(&path);
5851         return ret;
5852 }
5853
5854 static int check_block(struct btrfs_root *root,
5855                        struct cache_tree *extent_cache,
5856                        struct extent_buffer *buf, u64 flags)
5857 {
5858         struct extent_record *rec;
5859         struct cache_extent *cache;
5860         struct btrfs_key key;
5861         enum btrfs_tree_block_status status;
5862         int ret = 0;
5863         int level;
5864
5865         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5866         if (!cache)
5867                 return 1;
5868         rec = container_of(cache, struct extent_record, cache);
5869         rec->generation = btrfs_header_generation(buf);
5870
5871         level = btrfs_header_level(buf);
5872         if (btrfs_header_nritems(buf) > 0) {
5873
5874                 if (level == 0)
5875                         btrfs_item_key_to_cpu(buf, &key, 0);
5876                 else
5877                         btrfs_node_key_to_cpu(buf, &key, 0);
5878
5879                 rec->info_objectid = key.objectid;
5880         }
5881         rec->info_level = level;
5882
5883         if (btrfs_is_leaf(buf))
5884                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5885         else
5886                 status = btrfs_check_node(root, &rec->parent_key, buf);
5887
5888         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5889                 if (repair)
5890                         status = try_to_fix_bad_block(root, buf, status);
5891                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5892                         ret = -EIO;
5893                         fprintf(stderr, "bad block %llu\n",
5894                                 (unsigned long long)buf->start);
5895                 } else {
5896                         /*
5897                          * Signal to callers we need to start the scan over
5898                          * again since we'll have cowed blocks.
5899                          */
5900                         ret = -EAGAIN;
5901                 }
5902         } else {
5903                 rec->content_checked = 1;
5904                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5905                         rec->owner_ref_checked = 1;
5906                 else {
5907                         ret = check_owner_ref(root, rec, buf);
5908                         if (!ret)
5909                                 rec->owner_ref_checked = 1;
5910                 }
5911         }
5912         if (!ret)
5913                 maybe_free_extent_rec(extent_cache, rec);
5914         return ret;
5915 }
5916
5917 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5918                                                 u64 parent, u64 root)
5919 {
5920         struct list_head *cur = rec->backrefs.next;
5921         struct extent_backref *node;
5922         struct tree_backref *back;
5923
5924         while(cur != &rec->backrefs) {
5925                 node = to_extent_backref(cur);
5926                 cur = cur->next;
5927                 if (node->is_data)
5928                         continue;
5929                 back = to_tree_backref(node);
5930                 if (parent > 0) {
5931                         if (!node->full_backref)
5932                                 continue;
5933                         if (parent == back->parent)
5934                                 return back;
5935                 } else {
5936                         if (node->full_backref)
5937                                 continue;
5938                         if (back->root == root)
5939                                 return back;
5940                 }
5941         }
5942         return NULL;
5943 }
5944
5945 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5946                                                 u64 parent, u64 root)
5947 {
5948         struct tree_backref *ref = malloc(sizeof(*ref));
5949
5950         if (!ref)
5951                 return NULL;
5952         memset(&ref->node, 0, sizeof(ref->node));
5953         if (parent > 0) {
5954                 ref->parent = parent;
5955                 ref->node.full_backref = 1;
5956         } else {
5957                 ref->root = root;
5958                 ref->node.full_backref = 0;
5959         }
5960         list_add_tail(&ref->node.list, &rec->backrefs);
5961
5962         return ref;
5963 }
5964
5965 static struct data_backref *find_data_backref(struct extent_record *rec,
5966                                                 u64 parent, u64 root,
5967                                                 u64 owner, u64 offset,
5968                                                 int found_ref,
5969                                                 u64 disk_bytenr, u64 bytes)
5970 {
5971         struct list_head *cur = rec->backrefs.next;
5972         struct extent_backref *node;
5973         struct data_backref *back;
5974
5975         while(cur != &rec->backrefs) {
5976                 node = to_extent_backref(cur);
5977                 cur = cur->next;
5978                 if (!node->is_data)
5979                         continue;
5980                 back = to_data_backref(node);
5981                 if (parent > 0) {
5982                         if (!node->full_backref)
5983                                 continue;
5984                         if (parent == back->parent)
5985                                 return back;
5986                 } else {
5987                         if (node->full_backref)
5988                                 continue;
5989                         if (back->root == root && back->owner == owner &&
5990                             back->offset == offset) {
5991                                 if (found_ref && node->found_ref &&
5992                                     (back->bytes != bytes ||
5993                                     back->disk_bytenr != disk_bytenr))
5994                                         continue;
5995                                 return back;
5996                         }
5997                 }
5998         }
5999         return NULL;
6000 }
6001
6002 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6003                                                 u64 parent, u64 root,
6004                                                 u64 owner, u64 offset,
6005                                                 u64 max_size)
6006 {
6007         struct data_backref *ref = malloc(sizeof(*ref));
6008
6009         if (!ref)
6010                 return NULL;
6011         memset(&ref->node, 0, sizeof(ref->node));
6012         ref->node.is_data = 1;
6013
6014         if (parent > 0) {
6015                 ref->parent = parent;
6016                 ref->owner = 0;
6017                 ref->offset = 0;
6018                 ref->node.full_backref = 1;
6019         } else {
6020                 ref->root = root;
6021                 ref->owner = owner;
6022                 ref->offset = offset;
6023                 ref->node.full_backref = 0;
6024         }
6025         ref->bytes = max_size;
6026         ref->found_ref = 0;
6027         ref->num_refs = 0;
6028         list_add_tail(&ref->node.list, &rec->backrefs);
6029         if (max_size > rec->max_size)
6030                 rec->max_size = max_size;
6031         return ref;
6032 }
6033
6034 /* Check if the type of extent matches with its chunk */
6035 static void check_extent_type(struct extent_record *rec)
6036 {
6037         struct btrfs_block_group_cache *bg_cache;
6038
6039         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6040         if (!bg_cache)
6041                 return;
6042
6043         /* data extent, check chunk directly*/
6044         if (!rec->metadata) {
6045                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6046                         rec->wrong_chunk_type = 1;
6047                 return;
6048         }
6049
6050         /* metadata extent, check the obvious case first */
6051         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6052                                  BTRFS_BLOCK_GROUP_METADATA))) {
6053                 rec->wrong_chunk_type = 1;
6054                 return;
6055         }
6056
6057         /*
6058          * Check SYSTEM extent, as it's also marked as metadata, we can only
6059          * make sure it's a SYSTEM extent by its backref
6060          */
6061         if (!list_empty(&rec->backrefs)) {
6062                 struct extent_backref *node;
6063                 struct tree_backref *tback;
6064                 u64 bg_type;
6065
6066                 node = to_extent_backref(rec->backrefs.next);
6067                 if (node->is_data) {
6068                         /* tree block shouldn't have data backref */
6069                         rec->wrong_chunk_type = 1;
6070                         return;
6071                 }
6072                 tback = container_of(node, struct tree_backref, node);
6073
6074                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6075                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6076                 else
6077                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6078                 if (!(bg_cache->flags & bg_type))
6079                         rec->wrong_chunk_type = 1;
6080         }
6081 }
6082
6083 /*
6084  * Allocate a new extent record, fill default values from @tmpl and insert int
6085  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6086  * the cache, otherwise it fails.
6087  */
6088 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6089                 struct extent_record *tmpl)
6090 {
6091         struct extent_record *rec;
6092         int ret = 0;
6093
6094         BUG_ON(tmpl->max_size == 0);
6095         rec = malloc(sizeof(*rec));
6096         if (!rec)
6097                 return -ENOMEM;
6098         rec->start = tmpl->start;
6099         rec->max_size = tmpl->max_size;
6100         rec->nr = max(tmpl->nr, tmpl->max_size);
6101         rec->found_rec = tmpl->found_rec;
6102         rec->content_checked = tmpl->content_checked;
6103         rec->owner_ref_checked = tmpl->owner_ref_checked;
6104         rec->num_duplicates = 0;
6105         rec->metadata = tmpl->metadata;
6106         rec->flag_block_full_backref = FLAG_UNSET;
6107         rec->bad_full_backref = 0;
6108         rec->crossing_stripes = 0;
6109         rec->wrong_chunk_type = 0;
6110         rec->is_root = tmpl->is_root;
6111         rec->refs = tmpl->refs;
6112         rec->extent_item_refs = tmpl->extent_item_refs;
6113         rec->parent_generation = tmpl->parent_generation;
6114         INIT_LIST_HEAD(&rec->backrefs);
6115         INIT_LIST_HEAD(&rec->dups);
6116         INIT_LIST_HEAD(&rec->list);
6117         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6118         rec->cache.start = tmpl->start;
6119         rec->cache.size = tmpl->nr;
6120         ret = insert_cache_extent(extent_cache, &rec->cache);
6121         if (ret) {
6122                 free(rec);
6123                 return ret;
6124         }
6125         bytes_used += rec->nr;
6126
6127         if (tmpl->metadata)
6128                 rec->crossing_stripes = check_crossing_stripes(global_info,
6129                                 rec->start, global_info->nodesize);
6130         check_extent_type(rec);
6131         return ret;
6132 }
6133
6134 /*
6135  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6136  * some are hints:
6137  * - refs              - if found, increase refs
6138  * - is_root           - if found, set
6139  * - content_checked   - if found, set
6140  * - owner_ref_checked - if found, set
6141  *
6142  * If not found, create a new one, initialize and insert.
6143  */
6144 static int add_extent_rec(struct cache_tree *extent_cache,
6145                 struct extent_record *tmpl)
6146 {
6147         struct extent_record *rec;
6148         struct cache_extent *cache;
6149         int ret = 0;
6150         int dup = 0;
6151
6152         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6153         if (cache) {
6154                 rec = container_of(cache, struct extent_record, cache);
6155                 if (tmpl->refs)
6156                         rec->refs++;
6157                 if (rec->nr == 1)
6158                         rec->nr = max(tmpl->nr, tmpl->max_size);
6159
6160                 /*
6161                  * We need to make sure to reset nr to whatever the extent
6162                  * record says was the real size, this way we can compare it to
6163                  * the backrefs.
6164                  */
6165                 if (tmpl->found_rec) {
6166                         if (tmpl->start != rec->start || rec->found_rec) {
6167                                 struct extent_record *tmp;
6168
6169                                 dup = 1;
6170                                 if (list_empty(&rec->list))
6171                                         list_add_tail(&rec->list,
6172                                                       &duplicate_extents);
6173
6174                                 /*
6175                                  * We have to do this song and dance in case we
6176                                  * find an extent record that falls inside of
6177                                  * our current extent record but does not have
6178                                  * the same objectid.
6179                                  */
6180                                 tmp = malloc(sizeof(*tmp));
6181                                 if (!tmp)
6182                                         return -ENOMEM;
6183                                 tmp->start = tmpl->start;
6184                                 tmp->max_size = tmpl->max_size;
6185                                 tmp->nr = tmpl->nr;
6186                                 tmp->found_rec = 1;
6187                                 tmp->metadata = tmpl->metadata;
6188                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6189                                 INIT_LIST_HEAD(&tmp->list);
6190                                 list_add_tail(&tmp->list, &rec->dups);
6191                                 rec->num_duplicates++;
6192                         } else {
6193                                 rec->nr = tmpl->nr;
6194                                 rec->found_rec = 1;
6195                         }
6196                 }
6197
6198                 if (tmpl->extent_item_refs && !dup) {
6199                         if (rec->extent_item_refs) {
6200                                 fprintf(stderr, "block %llu rec "
6201                                         "extent_item_refs %llu, passed %llu\n",
6202                                         (unsigned long long)tmpl->start,
6203                                         (unsigned long long)
6204                                                         rec->extent_item_refs,
6205                                         (unsigned long long)tmpl->extent_item_refs);
6206                         }
6207                         rec->extent_item_refs = tmpl->extent_item_refs;
6208                 }
6209                 if (tmpl->is_root)
6210                         rec->is_root = 1;
6211                 if (tmpl->content_checked)
6212                         rec->content_checked = 1;
6213                 if (tmpl->owner_ref_checked)
6214                         rec->owner_ref_checked = 1;
6215                 memcpy(&rec->parent_key, &tmpl->parent_key,
6216                                 sizeof(tmpl->parent_key));
6217                 if (tmpl->parent_generation)
6218                         rec->parent_generation = tmpl->parent_generation;
6219                 if (rec->max_size < tmpl->max_size)
6220                         rec->max_size = tmpl->max_size;
6221
6222                 /*
6223                  * A metadata extent can't cross stripe_len boundary, otherwise
6224                  * kernel scrub won't be able to handle it.
6225                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6226                  * it.
6227                  */
6228                 if (tmpl->metadata)
6229                         rec->crossing_stripes = check_crossing_stripes(
6230                                         global_info, rec->start,
6231                                         global_info->nodesize);
6232                 check_extent_type(rec);
6233                 maybe_free_extent_rec(extent_cache, rec);
6234                 return ret;
6235         }
6236
6237         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6238
6239         return ret;
6240 }
6241
6242 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6243                             u64 parent, u64 root, int found_ref)
6244 {
6245         struct extent_record *rec;
6246         struct tree_backref *back;
6247         struct cache_extent *cache;
6248         int ret;
6249
6250         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6251         if (!cache) {
6252                 struct extent_record tmpl;
6253
6254                 memset(&tmpl, 0, sizeof(tmpl));
6255                 tmpl.start = bytenr;
6256                 tmpl.nr = 1;
6257                 tmpl.metadata = 1;
6258                 tmpl.max_size = 1;
6259
6260                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6261                 if (ret)
6262                         return ret;
6263
6264                 /* really a bug in cache_extent implement now */
6265                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6266                 if (!cache)
6267                         return -ENOENT;
6268         }
6269
6270         rec = container_of(cache, struct extent_record, cache);
6271         if (rec->start != bytenr) {
6272                 /*
6273                  * Several cause, from unaligned bytenr to over lapping extents
6274                  */
6275                 return -EEXIST;
6276         }
6277
6278         back = find_tree_backref(rec, parent, root);
6279         if (!back) {
6280                 back = alloc_tree_backref(rec, parent, root);
6281                 if (!back)
6282                         return -ENOMEM;
6283         }
6284
6285         if (found_ref) {
6286                 if (back->node.found_ref) {
6287                         fprintf(stderr, "Extent back ref already exists "
6288                                 "for %llu parent %llu root %llu \n",
6289                                 (unsigned long long)bytenr,
6290                                 (unsigned long long)parent,
6291                                 (unsigned long long)root);
6292                 }
6293                 back->node.found_ref = 1;
6294         } else {
6295                 if (back->node.found_extent_tree) {
6296                         fprintf(stderr, "Extent back ref already exists "
6297                                 "for %llu parent %llu root %llu \n",
6298                                 (unsigned long long)bytenr,
6299                                 (unsigned long long)parent,
6300                                 (unsigned long long)root);
6301                 }
6302                 back->node.found_extent_tree = 1;
6303         }
6304         check_extent_type(rec);
6305         maybe_free_extent_rec(extent_cache, rec);
6306         return 0;
6307 }
6308
6309 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6310                             u64 parent, u64 root, u64 owner, u64 offset,
6311                             u32 num_refs, int found_ref, u64 max_size)
6312 {
6313         struct extent_record *rec;
6314         struct data_backref *back;
6315         struct cache_extent *cache;
6316         int ret;
6317
6318         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6319         if (!cache) {
6320                 struct extent_record tmpl;
6321
6322                 memset(&tmpl, 0, sizeof(tmpl));
6323                 tmpl.start = bytenr;
6324                 tmpl.nr = 1;
6325                 tmpl.max_size = max_size;
6326
6327                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6328                 if (ret)
6329                         return ret;
6330
6331                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6332                 if (!cache)
6333                         abort();
6334         }
6335
6336         rec = container_of(cache, struct extent_record, cache);
6337         if (rec->max_size < max_size)
6338                 rec->max_size = max_size;
6339
6340         /*
6341          * If found_ref is set then max_size is the real size and must match the
6342          * existing refs.  So if we have already found a ref then we need to
6343          * make sure that this ref matches the existing one, otherwise we need
6344          * to add a new backref so we can notice that the backrefs don't match
6345          * and we need to figure out who is telling the truth.  This is to
6346          * account for that awful fsync bug I introduced where we'd end up with
6347          * a btrfs_file_extent_item that would have its length include multiple
6348          * prealloc extents or point inside of a prealloc extent.
6349          */
6350         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6351                                  bytenr, max_size);
6352         if (!back) {
6353                 back = alloc_data_backref(rec, parent, root, owner, offset,
6354                                           max_size);
6355                 BUG_ON(!back);
6356         }
6357
6358         if (found_ref) {
6359                 BUG_ON(num_refs != 1);
6360                 if (back->node.found_ref)
6361                         BUG_ON(back->bytes != max_size);
6362                 back->node.found_ref = 1;
6363                 back->found_ref += 1;
6364                 back->bytes = max_size;
6365                 back->disk_bytenr = bytenr;
6366                 rec->refs += 1;
6367                 rec->content_checked = 1;
6368                 rec->owner_ref_checked = 1;
6369         } else {
6370                 if (back->node.found_extent_tree) {
6371                         fprintf(stderr, "Extent back ref already exists "
6372                                 "for %llu parent %llu root %llu "
6373                                 "owner %llu offset %llu num_refs %lu\n",
6374                                 (unsigned long long)bytenr,
6375                                 (unsigned long long)parent,
6376                                 (unsigned long long)root,
6377                                 (unsigned long long)owner,
6378                                 (unsigned long long)offset,
6379                                 (unsigned long)num_refs);
6380                 }
6381                 back->num_refs = num_refs;
6382                 back->node.found_extent_tree = 1;
6383         }
6384         maybe_free_extent_rec(extent_cache, rec);
6385         return 0;
6386 }
6387
6388 static int add_pending(struct cache_tree *pending,
6389                        struct cache_tree *seen, u64 bytenr, u32 size)
6390 {
6391         int ret;
6392         ret = add_cache_extent(seen, bytenr, size);
6393         if (ret)
6394                 return ret;
6395         add_cache_extent(pending, bytenr, size);
6396         return 0;
6397 }
6398
6399 static int pick_next_pending(struct cache_tree *pending,
6400                         struct cache_tree *reada,
6401                         struct cache_tree *nodes,
6402                         u64 last, struct block_info *bits, int bits_nr,
6403                         int *reada_bits)
6404 {
6405         unsigned long node_start = last;
6406         struct cache_extent *cache;
6407         int ret;
6408
6409         cache = search_cache_extent(reada, 0);
6410         if (cache) {
6411                 bits[0].start = cache->start;
6412                 bits[0].size = cache->size;
6413                 *reada_bits = 1;
6414                 return 1;
6415         }
6416         *reada_bits = 0;
6417         if (node_start > 32768)
6418                 node_start -= 32768;
6419
6420         cache = search_cache_extent(nodes, node_start);
6421         if (!cache)
6422                 cache = search_cache_extent(nodes, 0);
6423
6424         if (!cache) {
6425                  cache = search_cache_extent(pending, 0);
6426                  if (!cache)
6427                          return 0;
6428                  ret = 0;
6429                  do {
6430                          bits[ret].start = cache->start;
6431                          bits[ret].size = cache->size;
6432                          cache = next_cache_extent(cache);
6433                          ret++;
6434                  } while (cache && ret < bits_nr);
6435                  return ret;
6436         }
6437
6438         ret = 0;
6439         do {
6440                 bits[ret].start = cache->start;
6441                 bits[ret].size = cache->size;
6442                 cache = next_cache_extent(cache);
6443                 ret++;
6444         } while (cache && ret < bits_nr);
6445
6446         if (bits_nr - ret > 8) {
6447                 u64 lookup = bits[0].start + bits[0].size;
6448                 struct cache_extent *next;
6449                 next = search_cache_extent(pending, lookup);
6450                 while(next) {
6451                         if (next->start - lookup > 32768)
6452                                 break;
6453                         bits[ret].start = next->start;
6454                         bits[ret].size = next->size;
6455                         lookup = next->start + next->size;
6456                         ret++;
6457                         if (ret == bits_nr)
6458                                 break;
6459                         next = next_cache_extent(next);
6460                         if (!next)
6461                                 break;
6462                 }
6463         }
6464         return ret;
6465 }
6466
6467 static void free_chunk_record(struct cache_extent *cache)
6468 {
6469         struct chunk_record *rec;
6470
6471         rec = container_of(cache, struct chunk_record, cache);
6472         list_del_init(&rec->list);
6473         list_del_init(&rec->dextents);
6474         free(rec);
6475 }
6476
6477 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6478 {
6479         cache_tree_free_extents(chunk_cache, free_chunk_record);
6480 }
6481
6482 static void free_device_record(struct rb_node *node)
6483 {
6484         struct device_record *rec;
6485
6486         rec = container_of(node, struct device_record, node);
6487         free(rec);
6488 }
6489
6490 FREE_RB_BASED_TREE(device_cache, free_device_record);
6491
6492 int insert_block_group_record(struct block_group_tree *tree,
6493                               struct block_group_record *bg_rec)
6494 {
6495         int ret;
6496
6497         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6498         if (ret)
6499                 return ret;
6500
6501         list_add_tail(&bg_rec->list, &tree->block_groups);
6502         return 0;
6503 }
6504
6505 static void free_block_group_record(struct cache_extent *cache)
6506 {
6507         struct block_group_record *rec;
6508
6509         rec = container_of(cache, struct block_group_record, cache);
6510         list_del_init(&rec->list);
6511         free(rec);
6512 }
6513
6514 void free_block_group_tree(struct block_group_tree *tree)
6515 {
6516         cache_tree_free_extents(&tree->tree, free_block_group_record);
6517 }
6518
6519 int insert_device_extent_record(struct device_extent_tree *tree,
6520                                 struct device_extent_record *de_rec)
6521 {
6522         int ret;
6523
6524         /*
6525          * Device extent is a bit different from the other extents, because
6526          * the extents which belong to the different devices may have the
6527          * same start and size, so we need use the special extent cache
6528          * search/insert functions.
6529          */
6530         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6531         if (ret)
6532                 return ret;
6533
6534         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6535         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6536         return 0;
6537 }
6538
6539 static void free_device_extent_record(struct cache_extent *cache)
6540 {
6541         struct device_extent_record *rec;
6542
6543         rec = container_of(cache, struct device_extent_record, cache);
6544         if (!list_empty(&rec->chunk_list))
6545                 list_del_init(&rec->chunk_list);
6546         if (!list_empty(&rec->device_list))
6547                 list_del_init(&rec->device_list);
6548         free(rec);
6549 }
6550
6551 void free_device_extent_tree(struct device_extent_tree *tree)
6552 {
6553         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6554 }
6555
6556 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6557 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6558                                  struct extent_buffer *leaf, int slot)
6559 {
6560         struct btrfs_extent_ref_v0 *ref0;
6561         struct btrfs_key key;
6562         int ret;
6563
6564         btrfs_item_key_to_cpu(leaf, &key, slot);
6565         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6566         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6567                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6568                                 0, 0);
6569         } else {
6570                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6571                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6572         }
6573         return ret;
6574 }
6575 #endif
6576
6577 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6578                                             struct btrfs_key *key,
6579                                             int slot)
6580 {
6581         struct btrfs_chunk *ptr;
6582         struct chunk_record *rec;
6583         int num_stripes, i;
6584
6585         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6586         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6587
6588         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6589         if (!rec) {
6590                 fprintf(stderr, "memory allocation failed\n");
6591                 exit(-1);
6592         }
6593
6594         INIT_LIST_HEAD(&rec->list);
6595         INIT_LIST_HEAD(&rec->dextents);
6596         rec->bg_rec = NULL;
6597
6598         rec->cache.start = key->offset;
6599         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6600
6601         rec->generation = btrfs_header_generation(leaf);
6602
6603         rec->objectid = key->objectid;
6604         rec->type = key->type;
6605         rec->offset = key->offset;
6606
6607         rec->length = rec->cache.size;
6608         rec->owner = btrfs_chunk_owner(leaf, ptr);
6609         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6610         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6611         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6612         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6613         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6614         rec->num_stripes = num_stripes;
6615         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6616
6617         for (i = 0; i < rec->num_stripes; ++i) {
6618                 rec->stripes[i].devid =
6619                         btrfs_stripe_devid_nr(leaf, ptr, i);
6620                 rec->stripes[i].offset =
6621                         btrfs_stripe_offset_nr(leaf, ptr, i);
6622                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6623                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6624                                 BTRFS_UUID_SIZE);
6625         }
6626
6627         return rec;
6628 }
6629
6630 static int process_chunk_item(struct cache_tree *chunk_cache,
6631                               struct btrfs_key *key, struct extent_buffer *eb,
6632                               int slot)
6633 {
6634         struct chunk_record *rec;
6635         struct btrfs_chunk *chunk;
6636         int ret = 0;
6637
6638         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6639         /*
6640          * Do extra check for this chunk item,
6641          *
6642          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6643          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6644          * and owner<->key_type check.
6645          */
6646         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6647                                       key->offset);
6648         if (ret < 0) {
6649                 error("chunk(%llu, %llu) is not valid, ignore it",
6650                       key->offset, btrfs_chunk_length(eb, chunk));
6651                 return 0;
6652         }
6653         rec = btrfs_new_chunk_record(eb, key, slot);
6654         ret = insert_cache_extent(chunk_cache, &rec->cache);
6655         if (ret) {
6656                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6657                         rec->offset, rec->length);
6658                 free(rec);
6659         }
6660
6661         return ret;
6662 }
6663
6664 static int process_device_item(struct rb_root *dev_cache,
6665                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6666 {
6667         struct btrfs_dev_item *ptr;
6668         struct device_record *rec;
6669         int ret = 0;
6670
6671         ptr = btrfs_item_ptr(eb,
6672                 slot, struct btrfs_dev_item);
6673
6674         rec = malloc(sizeof(*rec));
6675         if (!rec) {
6676                 fprintf(stderr, "memory allocation failed\n");
6677                 return -ENOMEM;
6678         }
6679
6680         rec->devid = key->offset;
6681         rec->generation = btrfs_header_generation(eb);
6682
6683         rec->objectid = key->objectid;
6684         rec->type = key->type;
6685         rec->offset = key->offset;
6686
6687         rec->devid = btrfs_device_id(eb, ptr);
6688         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6689         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6690
6691         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6692         if (ret) {
6693                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6694                 free(rec);
6695         }
6696
6697         return ret;
6698 }
6699
6700 struct block_group_record *
6701 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6702                              int slot)
6703 {
6704         struct btrfs_block_group_item *ptr;
6705         struct block_group_record *rec;
6706
6707         rec = calloc(1, sizeof(*rec));
6708         if (!rec) {
6709                 fprintf(stderr, "memory allocation failed\n");
6710                 exit(-1);
6711         }
6712
6713         rec->cache.start = key->objectid;
6714         rec->cache.size = key->offset;
6715
6716         rec->generation = btrfs_header_generation(leaf);
6717
6718         rec->objectid = key->objectid;
6719         rec->type = key->type;
6720         rec->offset = key->offset;
6721
6722         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6723         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6724
6725         INIT_LIST_HEAD(&rec->list);
6726
6727         return rec;
6728 }
6729
6730 static int process_block_group_item(struct block_group_tree *block_group_cache,
6731                                     struct btrfs_key *key,
6732                                     struct extent_buffer *eb, int slot)
6733 {
6734         struct block_group_record *rec;
6735         int ret = 0;
6736
6737         rec = btrfs_new_block_group_record(eb, key, slot);
6738         ret = insert_block_group_record(block_group_cache, rec);
6739         if (ret) {
6740                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6741                         rec->objectid, rec->offset);
6742                 free(rec);
6743         }
6744
6745         return ret;
6746 }
6747
6748 struct device_extent_record *
6749 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6750                                struct btrfs_key *key, int slot)
6751 {
6752         struct device_extent_record *rec;
6753         struct btrfs_dev_extent *ptr;
6754
6755         rec = calloc(1, sizeof(*rec));
6756         if (!rec) {
6757                 fprintf(stderr, "memory allocation failed\n");
6758                 exit(-1);
6759         }
6760
6761         rec->cache.objectid = key->objectid;
6762         rec->cache.start = key->offset;
6763
6764         rec->generation = btrfs_header_generation(leaf);
6765
6766         rec->objectid = key->objectid;
6767         rec->type = key->type;
6768         rec->offset = key->offset;
6769
6770         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6771         rec->chunk_objecteid =
6772                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6773         rec->chunk_offset =
6774                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6775         rec->length = btrfs_dev_extent_length(leaf, ptr);
6776         rec->cache.size = rec->length;
6777
6778         INIT_LIST_HEAD(&rec->chunk_list);
6779         INIT_LIST_HEAD(&rec->device_list);
6780
6781         return rec;
6782 }
6783
6784 static int
6785 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6786                            struct btrfs_key *key, struct extent_buffer *eb,
6787                            int slot)
6788 {
6789         struct device_extent_record *rec;
6790         int ret;
6791
6792         rec = btrfs_new_device_extent_record(eb, key, slot);
6793         ret = insert_device_extent_record(dev_extent_cache, rec);
6794         if (ret) {
6795                 fprintf(stderr,
6796                         "Device extent[%llu, %llu, %llu] existed.\n",
6797                         rec->objectid, rec->offset, rec->length);
6798                 free(rec);
6799         }
6800
6801         return ret;
6802 }
6803
6804 static int process_extent_item(struct btrfs_root *root,
6805                                struct cache_tree *extent_cache,
6806                                struct extent_buffer *eb, int slot)
6807 {
6808         struct btrfs_extent_item *ei;
6809         struct btrfs_extent_inline_ref *iref;
6810         struct btrfs_extent_data_ref *dref;
6811         struct btrfs_shared_data_ref *sref;
6812         struct btrfs_key key;
6813         struct extent_record tmpl;
6814         unsigned long end;
6815         unsigned long ptr;
6816         int ret;
6817         int type;
6818         u32 item_size = btrfs_item_size_nr(eb, slot);
6819         u64 refs = 0;
6820         u64 offset;
6821         u64 num_bytes;
6822         int metadata = 0;
6823
6824         btrfs_item_key_to_cpu(eb, &key, slot);
6825
6826         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6827                 metadata = 1;
6828                 num_bytes = root->fs_info->nodesize;
6829         } else {
6830                 num_bytes = key.offset;
6831         }
6832
6833         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6834                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6835                       key.objectid, root->fs_info->sectorsize);
6836                 return -EIO;
6837         }
6838         if (item_size < sizeof(*ei)) {
6839 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6840                 struct btrfs_extent_item_v0 *ei0;
6841                 BUG_ON(item_size != sizeof(*ei0));
6842                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6843                 refs = btrfs_extent_refs_v0(eb, ei0);
6844 #else
6845                 BUG();
6846 #endif
6847                 memset(&tmpl, 0, sizeof(tmpl));
6848                 tmpl.start = key.objectid;
6849                 tmpl.nr = num_bytes;
6850                 tmpl.extent_item_refs = refs;
6851                 tmpl.metadata = metadata;
6852                 tmpl.found_rec = 1;
6853                 tmpl.max_size = num_bytes;
6854
6855                 return add_extent_rec(extent_cache, &tmpl);
6856         }
6857
6858         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6859         refs = btrfs_extent_refs(eb, ei);
6860         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6861                 metadata = 1;
6862         else
6863                 metadata = 0;
6864         if (metadata && num_bytes != root->fs_info->nodesize) {
6865                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6866                       num_bytes, root->fs_info->nodesize);
6867                 return -EIO;
6868         }
6869         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6870                 error("ignore invalid data extent, length %llu is not aligned to %u",
6871                       num_bytes, root->fs_info->sectorsize);
6872                 return -EIO;
6873         }
6874
6875         memset(&tmpl, 0, sizeof(tmpl));
6876         tmpl.start = key.objectid;
6877         tmpl.nr = num_bytes;
6878         tmpl.extent_item_refs = refs;
6879         tmpl.metadata = metadata;
6880         tmpl.found_rec = 1;
6881         tmpl.max_size = num_bytes;
6882         add_extent_rec(extent_cache, &tmpl);
6883
6884         ptr = (unsigned long)(ei + 1);
6885         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6886             key.type == BTRFS_EXTENT_ITEM_KEY)
6887                 ptr += sizeof(struct btrfs_tree_block_info);
6888
6889         end = (unsigned long)ei + item_size;
6890         while (ptr < end) {
6891                 iref = (struct btrfs_extent_inline_ref *)ptr;
6892                 type = btrfs_extent_inline_ref_type(eb, iref);
6893                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6894                 switch (type) {
6895                 case BTRFS_TREE_BLOCK_REF_KEY:
6896                         ret = add_tree_backref(extent_cache, key.objectid,
6897                                         0, offset, 0);
6898                         if (ret < 0)
6899                                 error(
6900                         "add_tree_backref failed (extent items tree block): %s",
6901                                       strerror(-ret));
6902                         break;
6903                 case BTRFS_SHARED_BLOCK_REF_KEY:
6904                         ret = add_tree_backref(extent_cache, key.objectid,
6905                                         offset, 0, 0);
6906                         if (ret < 0)
6907                                 error(
6908                         "add_tree_backref failed (extent items shared block): %s",
6909                                       strerror(-ret));
6910                         break;
6911                 case BTRFS_EXTENT_DATA_REF_KEY:
6912                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6913                         add_data_backref(extent_cache, key.objectid, 0,
6914                                         btrfs_extent_data_ref_root(eb, dref),
6915                                         btrfs_extent_data_ref_objectid(eb,
6916                                                                        dref),
6917                                         btrfs_extent_data_ref_offset(eb, dref),
6918                                         btrfs_extent_data_ref_count(eb, dref),
6919                                         0, num_bytes);
6920                         break;
6921                 case BTRFS_SHARED_DATA_REF_KEY:
6922                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6923                         add_data_backref(extent_cache, key.objectid, offset,
6924                                         0, 0, 0,
6925                                         btrfs_shared_data_ref_count(eb, sref),
6926                                         0, num_bytes);
6927                         break;
6928                 default:
6929                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6930                                 key.objectid, key.type, num_bytes);
6931                         goto out;
6932                 }
6933                 ptr += btrfs_extent_inline_ref_size(type);
6934         }
6935         WARN_ON(ptr > end);
6936 out:
6937         return 0;
6938 }
6939
6940 static int check_cache_range(struct btrfs_root *root,
6941                              struct btrfs_block_group_cache *cache,
6942                              u64 offset, u64 bytes)
6943 {
6944         struct btrfs_free_space *entry;
6945         u64 *logical;
6946         u64 bytenr;
6947         int stripe_len;
6948         int i, nr, ret;
6949
6950         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6951                 bytenr = btrfs_sb_offset(i);
6952                 ret = btrfs_rmap_block(root->fs_info,
6953                                        cache->key.objectid, bytenr, 0,
6954                                        &logical, &nr, &stripe_len);
6955                 if (ret)
6956                         return ret;
6957
6958                 while (nr--) {
6959                         if (logical[nr] + stripe_len <= offset)
6960                                 continue;
6961                         if (offset + bytes <= logical[nr])
6962                                 continue;
6963                         if (logical[nr] == offset) {
6964                                 if (stripe_len >= bytes) {
6965                                         free(logical);
6966                                         return 0;
6967                                 }
6968                                 bytes -= stripe_len;
6969                                 offset += stripe_len;
6970                         } else if (logical[nr] < offset) {
6971                                 if (logical[nr] + stripe_len >=
6972                                     offset + bytes) {
6973                                         free(logical);
6974                                         return 0;
6975                                 }
6976                                 bytes = (offset + bytes) -
6977                                         (logical[nr] + stripe_len);
6978                                 offset = logical[nr] + stripe_len;
6979                         } else {
6980                                 /*
6981                                  * Could be tricky, the super may land in the
6982                                  * middle of the area we're checking.  First
6983                                  * check the easiest case, it's at the end.
6984                                  */
6985                                 if (logical[nr] + stripe_len >=
6986                                     bytes + offset) {
6987                                         bytes = logical[nr] - offset;
6988                                         continue;
6989                                 }
6990
6991                                 /* Check the left side */
6992                                 ret = check_cache_range(root, cache,
6993                                                         offset,
6994                                                         logical[nr] - offset);
6995                                 if (ret) {
6996                                         free(logical);
6997                                         return ret;
6998                                 }
6999
7000                                 /* Now we continue with the right side */
7001                                 bytes = (offset + bytes) -
7002                                         (logical[nr] + stripe_len);
7003                                 offset = logical[nr] + stripe_len;
7004                         }
7005                 }
7006
7007                 free(logical);
7008         }
7009
7010         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7011         if (!entry) {
7012                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7013                         offset, offset+bytes);
7014                 return -EINVAL;
7015         }
7016
7017         if (entry->offset != offset) {
7018                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7019                         entry->offset);
7020                 return -EINVAL;
7021         }
7022
7023         if (entry->bytes != bytes) {
7024                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7025                         bytes, entry->bytes, offset);
7026                 return -EINVAL;
7027         }
7028
7029         unlink_free_space(cache->free_space_ctl, entry);
7030         free(entry);
7031         return 0;
7032 }
7033
7034 static int verify_space_cache(struct btrfs_root *root,
7035                               struct btrfs_block_group_cache *cache)
7036 {
7037         struct btrfs_path path;
7038         struct extent_buffer *leaf;
7039         struct btrfs_key key;
7040         u64 last;
7041         int ret = 0;
7042
7043         root = root->fs_info->extent_root;
7044
7045         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7046
7047         btrfs_init_path(&path);
7048         key.objectid = last;
7049         key.offset = 0;
7050         key.type = BTRFS_EXTENT_ITEM_KEY;
7051         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7052         if (ret < 0)
7053                 goto out;
7054         ret = 0;
7055         while (1) {
7056                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7057                         ret = btrfs_next_leaf(root, &path);
7058                         if (ret < 0)
7059                                 goto out;
7060                         if (ret > 0) {
7061                                 ret = 0;
7062                                 break;
7063                         }
7064                 }
7065                 leaf = path.nodes[0];
7066                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7067                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7068                         break;
7069                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7070                     key.type != BTRFS_METADATA_ITEM_KEY) {
7071                         path.slots[0]++;
7072                         continue;
7073                 }
7074
7075                 if (last == key.objectid) {
7076                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7077                                 last = key.objectid + key.offset;
7078                         else
7079                                 last = key.objectid + root->fs_info->nodesize;
7080                         path.slots[0]++;
7081                         continue;
7082                 }
7083
7084                 ret = check_cache_range(root, cache, last,
7085                                         key.objectid - last);
7086                 if (ret)
7087                         break;
7088                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7089                         last = key.objectid + key.offset;
7090                 else
7091                         last = key.objectid + root->fs_info->nodesize;
7092                 path.slots[0]++;
7093         }
7094
7095         if (last < cache->key.objectid + cache->key.offset)
7096                 ret = check_cache_range(root, cache, last,
7097                                         cache->key.objectid +
7098                                         cache->key.offset - last);
7099
7100 out:
7101         btrfs_release_path(&path);
7102
7103         if (!ret &&
7104             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7105                 fprintf(stderr, "There are still entries left in the space "
7106                         "cache\n");
7107                 ret = -EINVAL;
7108         }
7109
7110         return ret;
7111 }
7112
7113 static int check_space_cache(struct btrfs_root *root)
7114 {
7115         struct btrfs_block_group_cache *cache;
7116         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7117         int ret;
7118         int error = 0;
7119
7120         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7121             btrfs_super_generation(root->fs_info->super_copy) !=
7122             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7123                 printf("cache and super generation don't match, space cache "
7124                        "will be invalidated\n");
7125                 return 0;
7126         }
7127
7128         if (ctx.progress_enabled) {
7129                 ctx.tp = TASK_FREE_SPACE;
7130                 task_start(ctx.info);
7131         }
7132
7133         while (1) {
7134                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7135                 if (!cache)
7136                         break;
7137
7138                 start = cache->key.objectid + cache->key.offset;
7139                 if (!cache->free_space_ctl) {
7140                         if (btrfs_init_free_space_ctl(cache,
7141                                                 root->fs_info->sectorsize)) {
7142                                 ret = -ENOMEM;
7143                                 break;
7144                         }
7145                 } else {
7146                         btrfs_remove_free_space_cache(cache);
7147                 }
7148
7149                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7150                         ret = exclude_super_stripes(root, cache);
7151                         if (ret) {
7152                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7153                                         strerror(-ret));
7154                                 error++;
7155                                 continue;
7156                         }
7157                         ret = load_free_space_tree(root->fs_info, cache);
7158                         free_excluded_extents(root, cache);
7159                         if (ret < 0) {
7160                                 fprintf(stderr, "could not load free space tree: %s\n",
7161                                         strerror(-ret));
7162                                 error++;
7163                                 continue;
7164                         }
7165                         error += ret;
7166                 } else {
7167                         ret = load_free_space_cache(root->fs_info, cache);
7168                         if (!ret)
7169                                 continue;
7170                 }
7171
7172                 ret = verify_space_cache(root, cache);
7173                 if (ret) {
7174                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7175                                 cache->key.objectid);
7176                         error++;
7177                 }
7178         }
7179
7180         task_stop(ctx.info);
7181
7182         return error ? -EINVAL : 0;
7183 }
7184
7185 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7186                         u64 num_bytes, unsigned long leaf_offset,
7187                         struct extent_buffer *eb) {
7188
7189         struct btrfs_fs_info *fs_info = root->fs_info;
7190         u64 offset = 0;
7191         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7192         char *data;
7193         unsigned long csum_offset;
7194         u32 csum;
7195         u32 csum_expected;
7196         u64 read_len;
7197         u64 data_checked = 0;
7198         u64 tmp;
7199         int ret = 0;
7200         int mirror;
7201         int num_copies;
7202
7203         if (num_bytes % fs_info->sectorsize)
7204                 return -EINVAL;
7205
7206         data = malloc(num_bytes);
7207         if (!data)
7208                 return -ENOMEM;
7209
7210         while (offset < num_bytes) {
7211                 mirror = 0;
7212 again:
7213                 read_len = num_bytes - offset;
7214                 /* read as much space once a time */
7215                 ret = read_extent_data(fs_info, data + offset,
7216                                 bytenr + offset, &read_len, mirror);
7217                 if (ret)
7218                         goto out;
7219                 data_checked = 0;
7220                 /* verify every 4k data's checksum */
7221                 while (data_checked < read_len) {
7222                         csum = ~(u32)0;
7223                         tmp = offset + data_checked;
7224
7225                         csum = btrfs_csum_data((char *)data + tmp,
7226                                                csum, fs_info->sectorsize);
7227                         btrfs_csum_final(csum, (u8 *)&csum);
7228
7229                         csum_offset = leaf_offset +
7230                                  tmp / fs_info->sectorsize * csum_size;
7231                         read_extent_buffer(eb, (char *)&csum_expected,
7232                                            csum_offset, csum_size);
7233                         /* try another mirror */
7234                         if (csum != csum_expected) {
7235                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7236                                                 mirror, bytenr + tmp,
7237                                                 csum, csum_expected);
7238                                 num_copies = btrfs_num_copies(root->fs_info,
7239                                                 bytenr, num_bytes);
7240                                 if (mirror < num_copies - 1) {
7241                                         mirror += 1;
7242                                         goto again;
7243                                 }
7244                         }
7245                         data_checked += fs_info->sectorsize;
7246                 }
7247                 offset += read_len;
7248         }
7249 out:
7250         free(data);
7251         return ret;
7252 }
7253
7254 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7255                                u64 num_bytes)
7256 {
7257         struct btrfs_path path;
7258         struct extent_buffer *leaf;
7259         struct btrfs_key key;
7260         int ret;
7261
7262         btrfs_init_path(&path);
7263         key.objectid = bytenr;
7264         key.type = BTRFS_EXTENT_ITEM_KEY;
7265         key.offset = (u64)-1;
7266
7267 again:
7268         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7269                                 0, 0);
7270         if (ret < 0) {
7271                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7272                 btrfs_release_path(&path);
7273                 return ret;
7274         } else if (ret) {
7275                 if (path.slots[0] > 0) {
7276                         path.slots[0]--;
7277                 } else {
7278                         ret = btrfs_prev_leaf(root, &path);
7279                         if (ret < 0) {
7280                                 goto out;
7281                         } else if (ret > 0) {
7282                                 ret = 0;
7283                                 goto out;
7284                         }
7285                 }
7286         }
7287
7288         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7289
7290         /*
7291          * Block group items come before extent items if they have the same
7292          * bytenr, so walk back one more just in case.  Dear future traveller,
7293          * first congrats on mastering time travel.  Now if it's not too much
7294          * trouble could you go back to 2006 and tell Chris to make the
7295          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7296          * EXTENT_ITEM_KEY please?
7297          */
7298         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7299                 if (path.slots[0] > 0) {
7300                         path.slots[0]--;
7301                 } else {
7302                         ret = btrfs_prev_leaf(root, &path);
7303                         if (ret < 0) {
7304                                 goto out;
7305                         } else if (ret > 0) {
7306                                 ret = 0;
7307                                 goto out;
7308                         }
7309                 }
7310                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7311         }
7312
7313         while (num_bytes) {
7314                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7315                         ret = btrfs_next_leaf(root, &path);
7316                         if (ret < 0) {
7317                                 fprintf(stderr, "Error going to next leaf "
7318                                         "%d\n", ret);
7319                                 btrfs_release_path(&path);
7320                                 return ret;
7321                         } else if (ret) {
7322                                 break;
7323                         }
7324                 }
7325                 leaf = path.nodes[0];
7326                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7327                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7328                         path.slots[0]++;
7329                         continue;
7330                 }
7331                 if (key.objectid + key.offset < bytenr) {
7332                         path.slots[0]++;
7333                         continue;
7334                 }
7335                 if (key.objectid > bytenr + num_bytes)
7336                         break;
7337
7338                 if (key.objectid == bytenr) {
7339                         if (key.offset >= num_bytes) {
7340                                 num_bytes = 0;
7341                                 break;
7342                         }
7343                         num_bytes -= key.offset;
7344                         bytenr += key.offset;
7345                 } else if (key.objectid < bytenr) {
7346                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7347                                 num_bytes = 0;
7348                                 break;
7349                         }
7350                         num_bytes = (bytenr + num_bytes) -
7351                                 (key.objectid + key.offset);
7352                         bytenr = key.objectid + key.offset;
7353                 } else {
7354                         if (key.objectid + key.offset < bytenr + num_bytes) {
7355                                 u64 new_start = key.objectid + key.offset;
7356                                 u64 new_bytes = bytenr + num_bytes - new_start;
7357
7358                                 /*
7359                                  * Weird case, the extent is in the middle of
7360                                  * our range, we'll have to search one side
7361                                  * and then the other.  Not sure if this happens
7362                                  * in real life, but no harm in coding it up
7363                                  * anyway just in case.
7364                                  */
7365                                 btrfs_release_path(&path);
7366                                 ret = check_extent_exists(root, new_start,
7367                                                           new_bytes);
7368                                 if (ret) {
7369                                         fprintf(stderr, "Right section didn't "
7370                                                 "have a record\n");
7371                                         break;
7372                                 }
7373                                 num_bytes = key.objectid - bytenr;
7374                                 goto again;
7375                         }
7376                         num_bytes = key.objectid - bytenr;
7377                 }
7378                 path.slots[0]++;
7379         }
7380         ret = 0;
7381
7382 out:
7383         if (num_bytes && !ret) {
7384                 fprintf(stderr, "There are no extents for csum range "
7385                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7386                 ret = 1;
7387         }
7388
7389         btrfs_release_path(&path);
7390         return ret;
7391 }
7392
7393 static int check_csums(struct btrfs_root *root)
7394 {
7395         struct btrfs_path path;
7396         struct extent_buffer *leaf;
7397         struct btrfs_key key;
7398         u64 offset = 0, num_bytes = 0;
7399         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7400         int errors = 0;
7401         int ret;
7402         u64 data_len;
7403         unsigned long leaf_offset;
7404
7405         root = root->fs_info->csum_root;
7406         if (!extent_buffer_uptodate(root->node)) {
7407                 fprintf(stderr, "No valid csum tree found\n");
7408                 return -ENOENT;
7409         }
7410
7411         btrfs_init_path(&path);
7412         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7413         key.type = BTRFS_EXTENT_CSUM_KEY;
7414         key.offset = 0;
7415         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7416         if (ret < 0) {
7417                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7418                 btrfs_release_path(&path);
7419                 return ret;
7420         }
7421
7422         if (ret > 0 && path.slots[0])
7423                 path.slots[0]--;
7424         ret = 0;
7425
7426         while (1) {
7427                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7428                         ret = btrfs_next_leaf(root, &path);
7429                         if (ret < 0) {
7430                                 fprintf(stderr, "Error going to next leaf "
7431                                         "%d\n", ret);
7432                                 break;
7433                         }
7434                         if (ret)
7435                                 break;
7436                 }
7437                 leaf = path.nodes[0];
7438
7439                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7440                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7441                         path.slots[0]++;
7442                         continue;
7443                 }
7444
7445                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7446                               csum_size) * root->fs_info->sectorsize;
7447                 if (!check_data_csum)
7448                         goto skip_csum_check;
7449                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7450                 ret = check_extent_csums(root, key.offset, data_len,
7451                                          leaf_offset, leaf);
7452                 if (ret)
7453                         break;
7454 skip_csum_check:
7455                 if (!num_bytes) {
7456                         offset = key.offset;
7457                 } else if (key.offset != offset + num_bytes) {
7458                         ret = check_extent_exists(root, offset, num_bytes);
7459                         if (ret) {
7460                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7461                                         "there is no extent record\n",
7462                                         offset, offset+num_bytes);
7463                                 errors++;
7464                         }
7465                         offset = key.offset;
7466                         num_bytes = 0;
7467                 }
7468                 num_bytes += data_len;
7469                 path.slots[0]++;
7470         }
7471
7472         btrfs_release_path(&path);
7473         return errors;
7474 }
7475
7476 static int is_dropped_key(struct btrfs_key *key,
7477                           struct btrfs_key *drop_key) {
7478         if (key->objectid < drop_key->objectid)
7479                 return 1;
7480         else if (key->objectid == drop_key->objectid) {
7481                 if (key->type < drop_key->type)
7482                         return 1;
7483                 else if (key->type == drop_key->type) {
7484                         if (key->offset < drop_key->offset)
7485                                 return 1;
7486                 }
7487         }
7488         return 0;
7489 }
7490
7491 /*
7492  * Here are the rules for FULL_BACKREF.
7493  *
7494  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7495  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7496  *      FULL_BACKREF set.
7497  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7498  *    if it happened after the relocation occurred since we'll have dropped the
7499  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7500  *    have no real way to know for sure.
7501  *
7502  * We process the blocks one root at a time, and we start from the lowest root
7503  * objectid and go to the highest.  So we can just lookup the owner backref for
7504  * the record and if we don't find it then we know it doesn't exist and we have
7505  * a FULL BACKREF.
7506  *
7507  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7508  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7509  * be set or not and then we can check later once we've gathered all the refs.
7510  */
7511 static int calc_extent_flag(struct cache_tree *extent_cache,
7512                            struct extent_buffer *buf,
7513                            struct root_item_record *ri,
7514                            u64 *flags)
7515 {
7516         struct extent_record *rec;
7517         struct cache_extent *cache;
7518         struct tree_backref *tback;
7519         u64 owner = 0;
7520
7521         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7522         /* we have added this extent before */
7523         if (!cache)
7524                 return -ENOENT;
7525
7526         rec = container_of(cache, struct extent_record, cache);
7527
7528         /*
7529          * Except file/reloc tree, we can not have
7530          * FULL BACKREF MODE
7531          */
7532         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7533                 goto normal;
7534         /*
7535          * root node
7536          */
7537         if (buf->start == ri->bytenr)
7538                 goto normal;
7539
7540         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7541                 goto full_backref;
7542
7543         owner = btrfs_header_owner(buf);
7544         if (owner == ri->objectid)
7545                 goto normal;
7546
7547         tback = find_tree_backref(rec, 0, owner);
7548         if (!tback)
7549                 goto full_backref;
7550 normal:
7551         *flags = 0;
7552         if (rec->flag_block_full_backref != FLAG_UNSET &&
7553             rec->flag_block_full_backref != 0)
7554                 rec->bad_full_backref = 1;
7555         return 0;
7556 full_backref:
7557         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7558         if (rec->flag_block_full_backref != FLAG_UNSET &&
7559             rec->flag_block_full_backref != 1)
7560                 rec->bad_full_backref = 1;
7561         return 0;
7562 }
7563
7564 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7565 {
7566         fprintf(stderr, "Invalid key type(");
7567         print_key_type(stderr, 0, key_type);
7568         fprintf(stderr, ") found in root(");
7569         print_objectid(stderr, rootid, 0);
7570         fprintf(stderr, ")\n");
7571 }
7572
7573 /*
7574  * Check if the key is valid with its extent buffer.
7575  *
7576  * This is a early check in case invalid key exists in a extent buffer
7577  * This is not comprehensive yet, but should prevent wrong key/item passed
7578  * further
7579  */
7580 static int check_type_with_root(u64 rootid, u8 key_type)
7581 {
7582         switch (key_type) {
7583         /* Only valid in chunk tree */
7584         case BTRFS_DEV_ITEM_KEY:
7585         case BTRFS_CHUNK_ITEM_KEY:
7586                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7587                         goto err;
7588                 break;
7589         /* valid in csum and log tree */
7590         case BTRFS_CSUM_TREE_OBJECTID:
7591                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7592                       is_fstree(rootid)))
7593                         goto err;
7594                 break;
7595         case BTRFS_EXTENT_ITEM_KEY:
7596         case BTRFS_METADATA_ITEM_KEY:
7597         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7598                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7599                         goto err;
7600                 break;
7601         case BTRFS_ROOT_ITEM_KEY:
7602                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7603                         goto err;
7604                 break;
7605         case BTRFS_DEV_EXTENT_KEY:
7606                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7607                         goto err;
7608                 break;
7609         }
7610         return 0;
7611 err:
7612         report_mismatch_key_root(key_type, rootid);
7613         return -EINVAL;
7614 }
7615
7616 static int run_next_block(struct btrfs_root *root,
7617                           struct block_info *bits,
7618                           int bits_nr,
7619                           u64 *last,
7620                           struct cache_tree *pending,
7621                           struct cache_tree *seen,
7622                           struct cache_tree *reada,
7623                           struct cache_tree *nodes,
7624                           struct cache_tree *extent_cache,
7625                           struct cache_tree *chunk_cache,
7626                           struct rb_root *dev_cache,
7627                           struct block_group_tree *block_group_cache,
7628                           struct device_extent_tree *dev_extent_cache,
7629                           struct root_item_record *ri)
7630 {
7631         struct btrfs_fs_info *fs_info = root->fs_info;
7632         struct extent_buffer *buf;
7633         struct extent_record *rec = NULL;
7634         u64 bytenr;
7635         u32 size;
7636         u64 parent;
7637         u64 owner;
7638         u64 flags;
7639         u64 ptr;
7640         u64 gen = 0;
7641         int ret = 0;
7642         int i;
7643         int nritems;
7644         struct btrfs_key key;
7645         struct cache_extent *cache;
7646         int reada_bits;
7647
7648         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7649                                     bits_nr, &reada_bits);
7650         if (nritems == 0)
7651                 return 1;
7652
7653         if (!reada_bits) {
7654                 for(i = 0; i < nritems; i++) {
7655                         ret = add_cache_extent(reada, bits[i].start,
7656                                                bits[i].size);
7657                         if (ret == -EEXIST)
7658                                 continue;
7659
7660                         /* fixme, get the parent transid */
7661                         readahead_tree_block(fs_info, bits[i].start,
7662                                              bits[i].size, 0);
7663                 }
7664         }
7665         *last = bits[0].start;
7666         bytenr = bits[0].start;
7667         size = bits[0].size;
7668
7669         cache = lookup_cache_extent(pending, bytenr, size);
7670         if (cache) {
7671                 remove_cache_extent(pending, cache);
7672                 free(cache);
7673         }
7674         cache = lookup_cache_extent(reada, bytenr, size);
7675         if (cache) {
7676                 remove_cache_extent(reada, cache);
7677                 free(cache);
7678         }
7679         cache = lookup_cache_extent(nodes, bytenr, size);
7680         if (cache) {
7681                 remove_cache_extent(nodes, cache);
7682                 free(cache);
7683         }
7684         cache = lookup_cache_extent(extent_cache, bytenr, size);
7685         if (cache) {
7686                 rec = container_of(cache, struct extent_record, cache);
7687                 gen = rec->parent_generation;
7688         }
7689
7690         /* fixme, get the real parent transid */
7691         buf = read_tree_block(root->fs_info, bytenr, size, gen);
7692         if (!extent_buffer_uptodate(buf)) {
7693                 record_bad_block_io(root->fs_info,
7694                                     extent_cache, bytenr, size);
7695                 goto out;
7696         }
7697
7698         nritems = btrfs_header_nritems(buf);
7699
7700         flags = 0;
7701         if (!init_extent_tree) {
7702                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7703                                        btrfs_header_level(buf), 1, NULL,
7704                                        &flags);
7705                 if (ret < 0) {
7706                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7707                         if (ret < 0) {
7708                                 fprintf(stderr, "Couldn't calc extent flags\n");
7709                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7710                         }
7711                 }
7712         } else {
7713                 flags = 0;
7714                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7715                 if (ret < 0) {
7716                         fprintf(stderr, "Couldn't calc extent flags\n");
7717                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7718                 }
7719         }
7720
7721         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7722                 if (ri != NULL &&
7723                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7724                     ri->objectid == btrfs_header_owner(buf)) {
7725                         /*
7726                          * Ok we got to this block from it's original owner and
7727                          * we have FULL_BACKREF set.  Relocation can leave
7728                          * converted blocks over so this is altogether possible,
7729                          * however it's not possible if the generation > the
7730                          * last snapshot, so check for this case.
7731                          */
7732                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7733                             btrfs_header_generation(buf) > ri->last_snapshot) {
7734                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7735                                 rec->bad_full_backref = 1;
7736                         }
7737                 }
7738         } else {
7739                 if (ri != NULL &&
7740                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7741                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7742                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7743                         rec->bad_full_backref = 1;
7744                 }
7745         }
7746
7747         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7748                 rec->flag_block_full_backref = 1;
7749                 parent = bytenr;
7750                 owner = 0;
7751         } else {
7752                 rec->flag_block_full_backref = 0;
7753                 parent = 0;
7754                 owner = btrfs_header_owner(buf);
7755         }
7756
7757         ret = check_block(root, extent_cache, buf, flags);
7758         if (ret)
7759                 goto out;
7760
7761         if (btrfs_is_leaf(buf)) {
7762                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7763                 for (i = 0; i < nritems; i++) {
7764                         struct btrfs_file_extent_item *fi;
7765                         btrfs_item_key_to_cpu(buf, &key, i);
7766                         /*
7767                          * Check key type against the leaf owner.
7768                          * Could filter quite a lot of early error if
7769                          * owner is correct
7770                          */
7771                         if (check_type_with_root(btrfs_header_owner(buf),
7772                                                  key.type)) {
7773                                 fprintf(stderr, "ignoring invalid key\n");
7774                                 continue;
7775                         }
7776                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7777                                 process_extent_item(root, extent_cache, buf,
7778                                                     i);
7779                                 continue;
7780                         }
7781                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7782                                 process_extent_item(root, extent_cache, buf,
7783                                                     i);
7784                                 continue;
7785                         }
7786                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7787                                 total_csum_bytes +=
7788                                         btrfs_item_size_nr(buf, i);
7789                                 continue;
7790                         }
7791                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7792                                 process_chunk_item(chunk_cache, &key, buf, i);
7793                                 continue;
7794                         }
7795                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7796                                 process_device_item(dev_cache, &key, buf, i);
7797                                 continue;
7798                         }
7799                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7800                                 process_block_group_item(block_group_cache,
7801                                         &key, buf, i);
7802                                 continue;
7803                         }
7804                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7805                                 process_device_extent_item(dev_extent_cache,
7806                                         &key, buf, i);
7807                                 continue;
7808
7809                         }
7810                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7811 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7812                                 process_extent_ref_v0(extent_cache, buf, i);
7813 #else
7814                                 BUG();
7815 #endif
7816                                 continue;
7817                         }
7818
7819                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7820                                 ret = add_tree_backref(extent_cache,
7821                                                 key.objectid, 0, key.offset, 0);
7822                                 if (ret < 0)
7823                                         error(
7824                                 "add_tree_backref failed (leaf tree block): %s",
7825                                               strerror(-ret));
7826                                 continue;
7827                         }
7828                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7829                                 ret = add_tree_backref(extent_cache,
7830                                                 key.objectid, key.offset, 0, 0);
7831                                 if (ret < 0)
7832                                         error(
7833                                 "add_tree_backref failed (leaf shared block): %s",
7834                                               strerror(-ret));
7835                                 continue;
7836                         }
7837                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7838                                 struct btrfs_extent_data_ref *ref;
7839                                 ref = btrfs_item_ptr(buf, i,
7840                                                 struct btrfs_extent_data_ref);
7841                                 add_data_backref(extent_cache,
7842                                         key.objectid, 0,
7843                                         btrfs_extent_data_ref_root(buf, ref),
7844                                         btrfs_extent_data_ref_objectid(buf,
7845                                                                        ref),
7846                                         btrfs_extent_data_ref_offset(buf, ref),
7847                                         btrfs_extent_data_ref_count(buf, ref),
7848                                         0, root->fs_info->sectorsize);
7849                                 continue;
7850                         }
7851                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7852                                 struct btrfs_shared_data_ref *ref;
7853                                 ref = btrfs_item_ptr(buf, i,
7854                                                 struct btrfs_shared_data_ref);
7855                                 add_data_backref(extent_cache,
7856                                         key.objectid, key.offset, 0, 0, 0,
7857                                         btrfs_shared_data_ref_count(buf, ref),
7858                                         0, root->fs_info->sectorsize);
7859                                 continue;
7860                         }
7861                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7862                                 struct bad_item *bad;
7863
7864                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7865                                         continue;
7866                                 if (!owner)
7867                                         continue;
7868                                 bad = malloc(sizeof(struct bad_item));
7869                                 if (!bad)
7870                                         continue;
7871                                 INIT_LIST_HEAD(&bad->list);
7872                                 memcpy(&bad->key, &key,
7873                                        sizeof(struct btrfs_key));
7874                                 bad->root_id = owner;
7875                                 list_add_tail(&bad->list, &delete_items);
7876                                 continue;
7877                         }
7878                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7879                                 continue;
7880                         fi = btrfs_item_ptr(buf, i,
7881                                             struct btrfs_file_extent_item);
7882                         if (btrfs_file_extent_type(buf, fi) ==
7883                             BTRFS_FILE_EXTENT_INLINE)
7884                                 continue;
7885                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7886                                 continue;
7887
7888                         data_bytes_allocated +=
7889                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7890                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7891                                 abort();
7892                         }
7893                         data_bytes_referenced +=
7894                                 btrfs_file_extent_num_bytes(buf, fi);
7895                         add_data_backref(extent_cache,
7896                                 btrfs_file_extent_disk_bytenr(buf, fi),
7897                                 parent, owner, key.objectid, key.offset -
7898                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7899                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7900                 }
7901         } else {
7902                 int level;
7903                 struct btrfs_key first_key;
7904
7905                 first_key.objectid = 0;
7906
7907                 if (nritems > 0)
7908                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7909                 level = btrfs_header_level(buf);
7910                 for (i = 0; i < nritems; i++) {
7911                         struct extent_record tmpl;
7912
7913                         ptr = btrfs_node_blockptr(buf, i);
7914                         size = root->fs_info->nodesize;
7915                         btrfs_node_key_to_cpu(buf, &key, i);
7916                         if (ri != NULL) {
7917                                 if ((level == ri->drop_level)
7918                                     && is_dropped_key(&key, &ri->drop_key)) {
7919                                         continue;
7920                                 }
7921                         }
7922
7923                         memset(&tmpl, 0, sizeof(tmpl));
7924                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7925                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7926                         tmpl.start = ptr;
7927                         tmpl.nr = size;
7928                         tmpl.refs = 1;
7929                         tmpl.metadata = 1;
7930                         tmpl.max_size = size;
7931                         ret = add_extent_rec(extent_cache, &tmpl);
7932                         if (ret < 0)
7933                                 goto out;
7934
7935                         ret = add_tree_backref(extent_cache, ptr, parent,
7936                                         owner, 1);
7937                         if (ret < 0) {
7938                                 error(
7939                                 "add_tree_backref failed (non-leaf block): %s",
7940                                       strerror(-ret));
7941                                 continue;
7942                         }
7943
7944                         if (level > 1) {
7945                                 add_pending(nodes, seen, ptr, size);
7946                         } else {
7947                                 add_pending(pending, seen, ptr, size);
7948                         }
7949                 }
7950                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7951                                       nritems) * sizeof(struct btrfs_key_ptr);
7952         }
7953         total_btree_bytes += buf->len;
7954         if (fs_root_objectid(btrfs_header_owner(buf)))
7955                 total_fs_tree_bytes += buf->len;
7956         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7957                 total_extent_tree_bytes += buf->len;
7958         if (!found_old_backref &&
7959             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7960             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7961             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7962                 found_old_backref = 1;
7963 out:
7964         free_extent_buffer(buf);
7965         return ret;
7966 }
7967
7968 static int add_root_to_pending(struct extent_buffer *buf,
7969                                struct cache_tree *extent_cache,
7970                                struct cache_tree *pending,
7971                                struct cache_tree *seen,
7972                                struct cache_tree *nodes,
7973                                u64 objectid)
7974 {
7975         struct extent_record tmpl;
7976         int ret;
7977
7978         if (btrfs_header_level(buf) > 0)
7979                 add_pending(nodes, seen, buf->start, buf->len);
7980         else
7981                 add_pending(pending, seen, buf->start, buf->len);
7982
7983         memset(&tmpl, 0, sizeof(tmpl));
7984         tmpl.start = buf->start;
7985         tmpl.nr = buf->len;
7986         tmpl.is_root = 1;
7987         tmpl.refs = 1;
7988         tmpl.metadata = 1;
7989         tmpl.max_size = buf->len;
7990         add_extent_rec(extent_cache, &tmpl);
7991
7992         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7993             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7994                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7995                                 0, 1);
7996         else
7997                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7998                                 1);
7999         return ret;
8000 }
8001
8002 /* as we fix the tree, we might be deleting blocks that
8003  * we're tracking for repair.  This hook makes sure we
8004  * remove any backrefs for blocks as we are fixing them.
8005  */
8006 static int free_extent_hook(struct btrfs_trans_handle *trans,
8007                             struct btrfs_root *root,
8008                             u64 bytenr, u64 num_bytes, u64 parent,
8009                             u64 root_objectid, u64 owner, u64 offset,
8010                             int refs_to_drop)
8011 {
8012         struct extent_record *rec;
8013         struct cache_extent *cache;
8014         int is_data;
8015         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8016
8017         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8018         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8019         if (!cache)
8020                 return 0;
8021
8022         rec = container_of(cache, struct extent_record, cache);
8023         if (is_data) {
8024                 struct data_backref *back;
8025                 back = find_data_backref(rec, parent, root_objectid, owner,
8026                                          offset, 1, bytenr, num_bytes);
8027                 if (!back)
8028                         goto out;
8029                 if (back->node.found_ref) {
8030                         back->found_ref -= refs_to_drop;
8031                         if (rec->refs)
8032                                 rec->refs -= refs_to_drop;
8033                 }
8034                 if (back->node.found_extent_tree) {
8035                         back->num_refs -= refs_to_drop;
8036                         if (rec->extent_item_refs)
8037                                 rec->extent_item_refs -= refs_to_drop;
8038                 }
8039                 if (back->found_ref == 0)
8040                         back->node.found_ref = 0;
8041                 if (back->num_refs == 0)
8042                         back->node.found_extent_tree = 0;
8043
8044                 if (!back->node.found_extent_tree && back->node.found_ref) {
8045                         list_del(&back->node.list);
8046                         free(back);
8047                 }
8048         } else {
8049                 struct tree_backref *back;
8050                 back = find_tree_backref(rec, parent, root_objectid);
8051                 if (!back)
8052                         goto out;
8053                 if (back->node.found_ref) {
8054                         if (rec->refs)
8055                                 rec->refs--;
8056                         back->node.found_ref = 0;
8057                 }
8058                 if (back->node.found_extent_tree) {
8059                         if (rec->extent_item_refs)
8060                                 rec->extent_item_refs--;
8061                         back->node.found_extent_tree = 0;
8062                 }
8063                 if (!back->node.found_extent_tree && back->node.found_ref) {
8064                         list_del(&back->node.list);
8065                         free(back);
8066                 }
8067         }
8068         maybe_free_extent_rec(extent_cache, rec);
8069 out:
8070         return 0;
8071 }
8072
8073 static int delete_extent_records(struct btrfs_trans_handle *trans,
8074                                  struct btrfs_root *root,
8075                                  struct btrfs_path *path,
8076                                  u64 bytenr)
8077 {
8078         struct btrfs_key key;
8079         struct btrfs_key found_key;
8080         struct extent_buffer *leaf;
8081         int ret;
8082         int slot;
8083
8084
8085         key.objectid = bytenr;
8086         key.type = (u8)-1;
8087         key.offset = (u64)-1;
8088
8089         while(1) {
8090                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8091                                         &key, path, 0, 1);
8092                 if (ret < 0)
8093                         break;
8094
8095                 if (ret > 0) {
8096                         ret = 0;
8097                         if (path->slots[0] == 0)
8098                                 break;
8099                         path->slots[0]--;
8100                 }
8101                 ret = 0;
8102
8103                 leaf = path->nodes[0];
8104                 slot = path->slots[0];
8105
8106                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8107                 if (found_key.objectid != bytenr)
8108                         break;
8109
8110                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8111                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8112                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8113                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8114                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8115                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8116                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8117                         btrfs_release_path(path);
8118                         if (found_key.type == 0) {
8119                                 if (found_key.offset == 0)
8120                                         break;
8121                                 key.offset = found_key.offset - 1;
8122                                 key.type = found_key.type;
8123                         }
8124                         key.type = found_key.type - 1;
8125                         key.offset = (u64)-1;
8126                         continue;
8127                 }
8128
8129                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8130                         found_key.objectid, found_key.type, found_key.offset);
8131
8132                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8133                 if (ret)
8134                         break;
8135                 btrfs_release_path(path);
8136
8137                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8138                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8139                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8140                                 found_key.offset : root->fs_info->nodesize;
8141
8142                         ret = btrfs_update_block_group(trans, root, bytenr,
8143                                                        bytes, 0, 0);
8144                         if (ret)
8145                                 break;
8146                 }
8147         }
8148
8149         btrfs_release_path(path);
8150         return ret;
8151 }
8152
8153 /*
8154  * for a single backref, this will allocate a new extent
8155  * and add the backref to it.
8156  */
8157 static int record_extent(struct btrfs_trans_handle *trans,
8158                          struct btrfs_fs_info *info,
8159                          struct btrfs_path *path,
8160                          struct extent_record *rec,
8161                          struct extent_backref *back,
8162                          int allocated, u64 flags)
8163 {
8164         int ret = 0;
8165         struct btrfs_root *extent_root = info->extent_root;
8166         struct extent_buffer *leaf;
8167         struct btrfs_key ins_key;
8168         struct btrfs_extent_item *ei;
8169         struct data_backref *dback;
8170         struct btrfs_tree_block_info *bi;
8171
8172         if (!back->is_data)
8173                 rec->max_size = max_t(u64, rec->max_size,
8174                                     info->nodesize);
8175
8176         if (!allocated) {
8177                 u32 item_size = sizeof(*ei);
8178
8179                 if (!back->is_data)
8180                         item_size += sizeof(*bi);
8181
8182                 ins_key.objectid = rec->start;
8183                 ins_key.offset = rec->max_size;
8184                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8185
8186                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8187                                         &ins_key, item_size);
8188                 if (ret)
8189                         goto fail;
8190
8191                 leaf = path->nodes[0];
8192                 ei = btrfs_item_ptr(leaf, path->slots[0],
8193                                     struct btrfs_extent_item);
8194
8195                 btrfs_set_extent_refs(leaf, ei, 0);
8196                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8197
8198                 if (back->is_data) {
8199                         btrfs_set_extent_flags(leaf, ei,
8200                                                BTRFS_EXTENT_FLAG_DATA);
8201                 } else {
8202                         struct btrfs_disk_key copy_key;;
8203
8204                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8205                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8206                                              sizeof(*bi));
8207
8208                         btrfs_set_disk_key_objectid(&copy_key,
8209                                                     rec->info_objectid);
8210                         btrfs_set_disk_key_type(&copy_key, 0);
8211                         btrfs_set_disk_key_offset(&copy_key, 0);
8212
8213                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8214                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8215
8216                         btrfs_set_extent_flags(leaf, ei,
8217                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8218                 }
8219
8220                 btrfs_mark_buffer_dirty(leaf);
8221                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8222                                                rec->max_size, 1, 0);
8223                 if (ret)
8224                         goto fail;
8225                 btrfs_release_path(path);
8226         }
8227
8228         if (back->is_data) {
8229                 u64 parent;
8230                 int i;
8231
8232                 dback = to_data_backref(back);
8233                 if (back->full_backref)
8234                         parent = dback->parent;
8235                 else
8236                         parent = 0;
8237
8238                 for (i = 0; i < dback->found_ref; i++) {
8239                         /* if parent != 0, we're doing a full backref
8240                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8241                          * just makes the backref allocator create a data
8242                          * backref
8243                          */
8244                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8245                                                    rec->start, rec->max_size,
8246                                                    parent,
8247                                                    dback->root,
8248                                                    parent ?
8249                                                    BTRFS_FIRST_FREE_OBJECTID :
8250                                                    dback->owner,
8251                                                    dback->offset);
8252                         if (ret)
8253                                 break;
8254                 }
8255                 fprintf(stderr, "adding new data backref"
8256                                 " on %llu %s %llu owner %llu"
8257                                 " offset %llu found %d\n",
8258                                 (unsigned long long)rec->start,
8259                                 back->full_backref ?
8260                                 "parent" : "root",
8261                                 back->full_backref ?
8262                                 (unsigned long long)parent :
8263                                 (unsigned long long)dback->root,
8264                                 (unsigned long long)dback->owner,
8265                                 (unsigned long long)dback->offset,
8266                                 dback->found_ref);
8267         } else {
8268                 u64 parent;
8269                 struct tree_backref *tback;
8270
8271                 tback = to_tree_backref(back);
8272                 if (back->full_backref)
8273                         parent = tback->parent;
8274                 else
8275                         parent = 0;
8276
8277                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8278                                            rec->start, rec->max_size,
8279                                            parent, tback->root, 0, 0);
8280                 fprintf(stderr, "adding new tree backref on "
8281                         "start %llu len %llu parent %llu root %llu\n",
8282                         rec->start, rec->max_size, parent, tback->root);
8283         }
8284 fail:
8285         btrfs_release_path(path);
8286         return ret;
8287 }
8288
8289 static struct extent_entry *find_entry(struct list_head *entries,
8290                                        u64 bytenr, u64 bytes)
8291 {
8292         struct extent_entry *entry = NULL;
8293
8294         list_for_each_entry(entry, entries, list) {
8295                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8296                         return entry;
8297         }
8298
8299         return NULL;
8300 }
8301
8302 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8303 {
8304         struct extent_entry *entry, *best = NULL, *prev = NULL;
8305
8306         list_for_each_entry(entry, entries, list) {
8307                 /*
8308                  * If there are as many broken entries as entries then we know
8309                  * not to trust this particular entry.
8310                  */
8311                 if (entry->broken == entry->count)
8312                         continue;
8313
8314                 /*
8315                  * Special case, when there are only two entries and 'best' is
8316                  * the first one
8317                  */
8318                 if (!prev) {
8319                         best = entry;
8320                         prev = entry;
8321                         continue;
8322                 }
8323
8324                 /*
8325                  * If our current entry == best then we can't be sure our best
8326                  * is really the best, so we need to keep searching.
8327                  */
8328                 if (best && best->count == entry->count) {
8329                         prev = entry;
8330                         best = NULL;
8331                         continue;
8332                 }
8333
8334                 /* Prev == entry, not good enough, have to keep searching */
8335                 if (!prev->broken && prev->count == entry->count)
8336                         continue;
8337
8338                 if (!best)
8339                         best = (prev->count > entry->count) ? prev : entry;
8340                 else if (best->count < entry->count)
8341                         best = entry;
8342                 prev = entry;
8343         }
8344
8345         return best;
8346 }
8347
8348 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8349                       struct data_backref *dback, struct extent_entry *entry)
8350 {
8351         struct btrfs_trans_handle *trans;
8352         struct btrfs_root *root;
8353         struct btrfs_file_extent_item *fi;
8354         struct extent_buffer *leaf;
8355         struct btrfs_key key;
8356         u64 bytenr, bytes;
8357         int ret, err;
8358
8359         key.objectid = dback->root;
8360         key.type = BTRFS_ROOT_ITEM_KEY;
8361         key.offset = (u64)-1;
8362         root = btrfs_read_fs_root(info, &key);
8363         if (IS_ERR(root)) {
8364                 fprintf(stderr, "Couldn't find root for our ref\n");
8365                 return -EINVAL;
8366         }
8367
8368         /*
8369          * The backref points to the original offset of the extent if it was
8370          * split, so we need to search down to the offset we have and then walk
8371          * forward until we find the backref we're looking for.
8372          */
8373         key.objectid = dback->owner;
8374         key.type = BTRFS_EXTENT_DATA_KEY;
8375         key.offset = dback->offset;
8376         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8377         if (ret < 0) {
8378                 fprintf(stderr, "Error looking up ref %d\n", ret);
8379                 return ret;
8380         }
8381
8382         while (1) {
8383                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8384                         ret = btrfs_next_leaf(root, path);
8385                         if (ret) {
8386                                 fprintf(stderr, "Couldn't find our ref, next\n");
8387                                 return -EINVAL;
8388                         }
8389                 }
8390                 leaf = path->nodes[0];
8391                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8392                 if (key.objectid != dback->owner ||
8393                     key.type != BTRFS_EXTENT_DATA_KEY) {
8394                         fprintf(stderr, "Couldn't find our ref, search\n");
8395                         return -EINVAL;
8396                 }
8397                 fi = btrfs_item_ptr(leaf, path->slots[0],
8398                                     struct btrfs_file_extent_item);
8399                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8400                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8401
8402                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8403                         break;
8404                 path->slots[0]++;
8405         }
8406
8407         btrfs_release_path(path);
8408
8409         trans = btrfs_start_transaction(root, 1);
8410         if (IS_ERR(trans))
8411                 return PTR_ERR(trans);
8412
8413         /*
8414          * Ok we have the key of the file extent we want to fix, now we can cow
8415          * down to the thing and fix it.
8416          */
8417         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8418         if (ret < 0) {
8419                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8420                         key.objectid, key.type, key.offset, ret);
8421                 goto out;
8422         }
8423         if (ret > 0) {
8424                 fprintf(stderr, "Well that's odd, we just found this key "
8425                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8426                         key.offset);
8427                 ret = -EINVAL;
8428                 goto out;
8429         }
8430         leaf = path->nodes[0];
8431         fi = btrfs_item_ptr(leaf, path->slots[0],
8432                             struct btrfs_file_extent_item);
8433
8434         if (btrfs_file_extent_compression(leaf, fi) &&
8435             dback->disk_bytenr != entry->bytenr) {
8436                 fprintf(stderr, "Ref doesn't match the record start and is "
8437                         "compressed, please take a btrfs-image of this file "
8438                         "system and send it to a btrfs developer so they can "
8439                         "complete this functionality for bytenr %Lu\n",
8440                         dback->disk_bytenr);
8441                 ret = -EINVAL;
8442                 goto out;
8443         }
8444
8445         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8446                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8447         } else if (dback->disk_bytenr > entry->bytenr) {
8448                 u64 off_diff, offset;
8449
8450                 off_diff = dback->disk_bytenr - entry->bytenr;
8451                 offset = btrfs_file_extent_offset(leaf, fi);
8452                 if (dback->disk_bytenr + offset +
8453                     btrfs_file_extent_num_bytes(leaf, fi) >
8454                     entry->bytenr + entry->bytes) {
8455                         fprintf(stderr, "Ref is past the entry end, please "
8456                                 "take a btrfs-image of this file system and "
8457                                 "send it to a btrfs developer, ref %Lu\n",
8458                                 dback->disk_bytenr);
8459                         ret = -EINVAL;
8460                         goto out;
8461                 }
8462                 offset += off_diff;
8463                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8464                 btrfs_set_file_extent_offset(leaf, fi, offset);
8465         } else if (dback->disk_bytenr < entry->bytenr) {
8466                 u64 offset;
8467
8468                 offset = btrfs_file_extent_offset(leaf, fi);
8469                 if (dback->disk_bytenr + offset < entry->bytenr) {
8470                         fprintf(stderr, "Ref is before the entry start, please"
8471                                 " take a btrfs-image of this file system and "
8472                                 "send it to a btrfs developer, ref %Lu\n",
8473                                 dback->disk_bytenr);
8474                         ret = -EINVAL;
8475                         goto out;
8476                 }
8477
8478                 offset += dback->disk_bytenr;
8479                 offset -= entry->bytenr;
8480                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8481                 btrfs_set_file_extent_offset(leaf, fi, offset);
8482         }
8483
8484         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8485
8486         /*
8487          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8488          * only do this if we aren't using compression, otherwise it's a
8489          * trickier case.
8490          */
8491         if (!btrfs_file_extent_compression(leaf, fi))
8492                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8493         else
8494                 printf("ram bytes may be wrong?\n");
8495         btrfs_mark_buffer_dirty(leaf);
8496 out:
8497         err = btrfs_commit_transaction(trans, root);
8498         btrfs_release_path(path);
8499         return ret ? ret : err;
8500 }
8501
8502 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8503                            struct extent_record *rec)
8504 {
8505         struct extent_backref *back;
8506         struct data_backref *dback;
8507         struct extent_entry *entry, *best = NULL;
8508         LIST_HEAD(entries);
8509         int nr_entries = 0;
8510         int broken_entries = 0;
8511         int ret = 0;
8512         short mismatch = 0;
8513
8514         /*
8515          * Metadata is easy and the backrefs should always agree on bytenr and
8516          * size, if not we've got bigger issues.
8517          */
8518         if (rec->metadata)
8519                 return 0;
8520
8521         list_for_each_entry(back, &rec->backrefs, list) {
8522                 if (back->full_backref || !back->is_data)
8523                         continue;
8524
8525                 dback = to_data_backref(back);
8526
8527                 /*
8528                  * We only pay attention to backrefs that we found a real
8529                  * backref for.
8530                  */
8531                 if (dback->found_ref == 0)
8532                         continue;
8533
8534                 /*
8535                  * For now we only catch when the bytes don't match, not the
8536                  * bytenr.  We can easily do this at the same time, but I want
8537                  * to have a fs image to test on before we just add repair
8538                  * functionality willy-nilly so we know we won't screw up the
8539                  * repair.
8540                  */
8541
8542                 entry = find_entry(&entries, dback->disk_bytenr,
8543                                    dback->bytes);
8544                 if (!entry) {
8545                         entry = malloc(sizeof(struct extent_entry));
8546                         if (!entry) {
8547                                 ret = -ENOMEM;
8548                                 goto out;
8549                         }
8550                         memset(entry, 0, sizeof(*entry));
8551                         entry->bytenr = dback->disk_bytenr;
8552                         entry->bytes = dback->bytes;
8553                         list_add_tail(&entry->list, &entries);
8554                         nr_entries++;
8555                 }
8556
8557                 /*
8558                  * If we only have on entry we may think the entries agree when
8559                  * in reality they don't so we have to do some extra checking.
8560                  */
8561                 if (dback->disk_bytenr != rec->start ||
8562                     dback->bytes != rec->nr || back->broken)
8563                         mismatch = 1;
8564
8565                 if (back->broken) {
8566                         entry->broken++;
8567                         broken_entries++;
8568                 }
8569
8570                 entry->count++;
8571         }
8572
8573         /* Yay all the backrefs agree, carry on good sir */
8574         if (nr_entries <= 1 && !mismatch)
8575                 goto out;
8576
8577         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8578                 "%Lu\n", rec->start);
8579
8580         /*
8581          * First we want to see if the backrefs can agree amongst themselves who
8582          * is right, so figure out which one of the entries has the highest
8583          * count.
8584          */
8585         best = find_most_right_entry(&entries);
8586
8587         /*
8588          * Ok so we may have an even split between what the backrefs think, so
8589          * this is where we use the extent ref to see what it thinks.
8590          */
8591         if (!best) {
8592                 entry = find_entry(&entries, rec->start, rec->nr);
8593                 if (!entry && (!broken_entries || !rec->found_rec)) {
8594                         fprintf(stderr, "Backrefs don't agree with each other "
8595                                 "and extent record doesn't agree with anybody,"
8596                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8597                                 rec->start, rec->nr);
8598                         ret = -EINVAL;
8599                         goto out;
8600                 } else if (!entry) {
8601                         /*
8602                          * Ok our backrefs were broken, we'll assume this is the
8603                          * correct value and add an entry for this range.
8604                          */
8605                         entry = malloc(sizeof(struct extent_entry));
8606                         if (!entry) {
8607                                 ret = -ENOMEM;
8608                                 goto out;
8609                         }
8610                         memset(entry, 0, sizeof(*entry));
8611                         entry->bytenr = rec->start;
8612                         entry->bytes = rec->nr;
8613                         list_add_tail(&entry->list, &entries);
8614                         nr_entries++;
8615                 }
8616                 entry->count++;
8617                 best = find_most_right_entry(&entries);
8618                 if (!best) {
8619                         fprintf(stderr, "Backrefs and extent record evenly "
8620                                 "split on who is right, this is going to "
8621                                 "require user input to fix bytenr %Lu bytes "
8622                                 "%Lu\n", rec->start, rec->nr);
8623                         ret = -EINVAL;
8624                         goto out;
8625                 }
8626         }
8627
8628         /*
8629          * I don't think this can happen currently as we'll abort() if we catch
8630          * this case higher up, but in case somebody removes that we still can't
8631          * deal with it properly here yet, so just bail out of that's the case.
8632          */
8633         if (best->bytenr != rec->start) {
8634                 fprintf(stderr, "Extent start and backref starts don't match, "
8635                         "please use btrfs-image on this file system and send "
8636                         "it to a btrfs developer so they can make fsck fix "
8637                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8638                         rec->start, rec->nr);
8639                 ret = -EINVAL;
8640                 goto out;
8641         }
8642
8643         /*
8644          * Ok great we all agreed on an extent record, let's go find the real
8645          * references and fix up the ones that don't match.
8646          */
8647         list_for_each_entry(back, &rec->backrefs, list) {
8648                 if (back->full_backref || !back->is_data)
8649                         continue;
8650
8651                 dback = to_data_backref(back);
8652
8653                 /*
8654                  * Still ignoring backrefs that don't have a real ref attached
8655                  * to them.
8656                  */
8657                 if (dback->found_ref == 0)
8658                         continue;
8659
8660                 if (dback->bytes == best->bytes &&
8661                     dback->disk_bytenr == best->bytenr)
8662                         continue;
8663
8664                 ret = repair_ref(info, path, dback, best);
8665                 if (ret)
8666                         goto out;
8667         }
8668
8669         /*
8670          * Ok we messed with the actual refs, which means we need to drop our
8671          * entire cache and go back and rescan.  I know this is a huge pain and
8672          * adds a lot of extra work, but it's the only way to be safe.  Once all
8673          * the backrefs agree we may not need to do anything to the extent
8674          * record itself.
8675          */
8676         ret = -EAGAIN;
8677 out:
8678         while (!list_empty(&entries)) {
8679                 entry = list_entry(entries.next, struct extent_entry, list);
8680                 list_del_init(&entry->list);
8681                 free(entry);
8682         }
8683         return ret;
8684 }
8685
8686 static int process_duplicates(struct cache_tree *extent_cache,
8687                               struct extent_record *rec)
8688 {
8689         struct extent_record *good, *tmp;
8690         struct cache_extent *cache;
8691         int ret;
8692
8693         /*
8694          * If we found a extent record for this extent then return, or if we
8695          * have more than one duplicate we are likely going to need to delete
8696          * something.
8697          */
8698         if (rec->found_rec || rec->num_duplicates > 1)
8699                 return 0;
8700
8701         /* Shouldn't happen but just in case */
8702         BUG_ON(!rec->num_duplicates);
8703
8704         /*
8705          * So this happens if we end up with a backref that doesn't match the
8706          * actual extent entry.  So either the backref is bad or the extent
8707          * entry is bad.  Either way we want to have the extent_record actually
8708          * reflect what we found in the extent_tree, so we need to take the
8709          * duplicate out and use that as the extent_record since the only way we
8710          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8711          */
8712         remove_cache_extent(extent_cache, &rec->cache);
8713
8714         good = to_extent_record(rec->dups.next);
8715         list_del_init(&good->list);
8716         INIT_LIST_HEAD(&good->backrefs);
8717         INIT_LIST_HEAD(&good->dups);
8718         good->cache.start = good->start;
8719         good->cache.size = good->nr;
8720         good->content_checked = 0;
8721         good->owner_ref_checked = 0;
8722         good->num_duplicates = 0;
8723         good->refs = rec->refs;
8724         list_splice_init(&rec->backrefs, &good->backrefs);
8725         while (1) {
8726                 cache = lookup_cache_extent(extent_cache, good->start,
8727                                             good->nr);
8728                 if (!cache)
8729                         break;
8730                 tmp = container_of(cache, struct extent_record, cache);
8731
8732                 /*
8733                  * If we find another overlapping extent and it's found_rec is
8734                  * set then it's a duplicate and we need to try and delete
8735                  * something.
8736                  */
8737                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8738                         if (list_empty(&good->list))
8739                                 list_add_tail(&good->list,
8740                                               &duplicate_extents);
8741                         good->num_duplicates += tmp->num_duplicates + 1;
8742                         list_splice_init(&tmp->dups, &good->dups);
8743                         list_del_init(&tmp->list);
8744                         list_add_tail(&tmp->list, &good->dups);
8745                         remove_cache_extent(extent_cache, &tmp->cache);
8746                         continue;
8747                 }
8748
8749                 /*
8750                  * Ok we have another non extent item backed extent rec, so lets
8751                  * just add it to this extent and carry on like we did above.
8752                  */
8753                 good->refs += tmp->refs;
8754                 list_splice_init(&tmp->backrefs, &good->backrefs);
8755                 remove_cache_extent(extent_cache, &tmp->cache);
8756                 free(tmp);
8757         }
8758         ret = insert_cache_extent(extent_cache, &good->cache);
8759         BUG_ON(ret);
8760         free(rec);
8761         return good->num_duplicates ? 0 : 1;
8762 }
8763
8764 static int delete_duplicate_records(struct btrfs_root *root,
8765                                     struct extent_record *rec)
8766 {
8767         struct btrfs_trans_handle *trans;
8768         LIST_HEAD(delete_list);
8769         struct btrfs_path path;
8770         struct extent_record *tmp, *good, *n;
8771         int nr_del = 0;
8772         int ret = 0, err;
8773         struct btrfs_key key;
8774
8775         btrfs_init_path(&path);
8776
8777         good = rec;
8778         /* Find the record that covers all of the duplicates. */
8779         list_for_each_entry(tmp, &rec->dups, list) {
8780                 if (good->start < tmp->start)
8781                         continue;
8782                 if (good->nr > tmp->nr)
8783                         continue;
8784
8785                 if (tmp->start + tmp->nr < good->start + good->nr) {
8786                         fprintf(stderr, "Ok we have overlapping extents that "
8787                                 "aren't completely covered by each other, this "
8788                                 "is going to require more careful thought.  "
8789                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8790                                 tmp->start, tmp->nr, good->start, good->nr);
8791                         abort();
8792                 }
8793                 good = tmp;
8794         }
8795
8796         if (good != rec)
8797                 list_add_tail(&rec->list, &delete_list);
8798
8799         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8800                 if (tmp == good)
8801                         continue;
8802                 list_move_tail(&tmp->list, &delete_list);
8803         }
8804
8805         root = root->fs_info->extent_root;
8806         trans = btrfs_start_transaction(root, 1);
8807         if (IS_ERR(trans)) {
8808                 ret = PTR_ERR(trans);
8809                 goto out;
8810         }
8811
8812         list_for_each_entry(tmp, &delete_list, list) {
8813                 if (tmp->found_rec == 0)
8814                         continue;
8815                 key.objectid = tmp->start;
8816                 key.type = BTRFS_EXTENT_ITEM_KEY;
8817                 key.offset = tmp->nr;
8818
8819                 /* Shouldn't happen but just in case */
8820                 if (tmp->metadata) {
8821                         fprintf(stderr, "Well this shouldn't happen, extent "
8822                                 "record overlaps but is metadata? "
8823                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8824                         abort();
8825                 }
8826
8827                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8828                 if (ret) {
8829                         if (ret > 0)
8830                                 ret = -EINVAL;
8831                         break;
8832                 }
8833                 ret = btrfs_del_item(trans, root, &path);
8834                 if (ret)
8835                         break;
8836                 btrfs_release_path(&path);
8837                 nr_del++;
8838         }
8839         err = btrfs_commit_transaction(trans, root);
8840         if (err && !ret)
8841                 ret = err;
8842 out:
8843         while (!list_empty(&delete_list)) {
8844                 tmp = to_extent_record(delete_list.next);
8845                 list_del_init(&tmp->list);
8846                 if (tmp == rec)
8847                         continue;
8848                 free(tmp);
8849         }
8850
8851         while (!list_empty(&rec->dups)) {
8852                 tmp = to_extent_record(rec->dups.next);
8853                 list_del_init(&tmp->list);
8854                 free(tmp);
8855         }
8856
8857         btrfs_release_path(&path);
8858
8859         if (!ret && !nr_del)
8860                 rec->num_duplicates = 0;
8861
8862         return ret ? ret : nr_del;
8863 }
8864
8865 static int find_possible_backrefs(struct btrfs_fs_info *info,
8866                                   struct btrfs_path *path,
8867                                   struct cache_tree *extent_cache,
8868                                   struct extent_record *rec)
8869 {
8870         struct btrfs_root *root;
8871         struct extent_backref *back;
8872         struct data_backref *dback;
8873         struct cache_extent *cache;
8874         struct btrfs_file_extent_item *fi;
8875         struct btrfs_key key;
8876         u64 bytenr, bytes;
8877         int ret;
8878
8879         list_for_each_entry(back, &rec->backrefs, list) {
8880                 /* Don't care about full backrefs (poor unloved backrefs) */
8881                 if (back->full_backref || !back->is_data)
8882                         continue;
8883
8884                 dback = to_data_backref(back);
8885
8886                 /* We found this one, we don't need to do a lookup */
8887                 if (dback->found_ref)
8888                         continue;
8889
8890                 key.objectid = dback->root;
8891                 key.type = BTRFS_ROOT_ITEM_KEY;
8892                 key.offset = (u64)-1;
8893
8894                 root = btrfs_read_fs_root(info, &key);
8895
8896                 /* No root, definitely a bad ref, skip */
8897                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8898                         continue;
8899                 /* Other err, exit */
8900                 if (IS_ERR(root))
8901                         return PTR_ERR(root);
8902
8903                 key.objectid = dback->owner;
8904                 key.type = BTRFS_EXTENT_DATA_KEY;
8905                 key.offset = dback->offset;
8906                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8907                 if (ret) {
8908                         btrfs_release_path(path);
8909                         if (ret < 0)
8910                                 return ret;
8911                         /* Didn't find it, we can carry on */
8912                         ret = 0;
8913                         continue;
8914                 }
8915
8916                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8917                                     struct btrfs_file_extent_item);
8918                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8919                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8920                 btrfs_release_path(path);
8921                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8922                 if (cache) {
8923                         struct extent_record *tmp;
8924                         tmp = container_of(cache, struct extent_record, cache);
8925
8926                         /*
8927                          * If we found an extent record for the bytenr for this
8928                          * particular backref then we can't add it to our
8929                          * current extent record.  We only want to add backrefs
8930                          * that don't have a corresponding extent item in the
8931                          * extent tree since they likely belong to this record
8932                          * and we need to fix it if it doesn't match bytenrs.
8933                          */
8934                         if  (tmp->found_rec)
8935                                 continue;
8936                 }
8937
8938                 dback->found_ref += 1;
8939                 dback->disk_bytenr = bytenr;
8940                 dback->bytes = bytes;
8941
8942                 /*
8943                  * Set this so the verify backref code knows not to trust the
8944                  * values in this backref.
8945                  */
8946                 back->broken = 1;
8947         }
8948
8949         return 0;
8950 }
8951
8952 /*
8953  * Record orphan data ref into corresponding root.
8954  *
8955  * Return 0 if the extent item contains data ref and recorded.
8956  * Return 1 if the extent item contains no useful data ref
8957  *   On that case, it may contains only shared_dataref or metadata backref
8958  *   or the file extent exists(this should be handled by the extent bytenr
8959  *   recovery routine)
8960  * Return <0 if something goes wrong.
8961  */
8962 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8963                                       struct extent_record *rec)
8964 {
8965         struct btrfs_key key;
8966         struct btrfs_root *dest_root;
8967         struct extent_backref *back;
8968         struct data_backref *dback;
8969         struct orphan_data_extent *orphan;
8970         struct btrfs_path path;
8971         int recorded_data_ref = 0;
8972         int ret = 0;
8973
8974         if (rec->metadata)
8975                 return 1;
8976         btrfs_init_path(&path);
8977         list_for_each_entry(back, &rec->backrefs, list) {
8978                 if (back->full_backref || !back->is_data ||
8979                     !back->found_extent_tree)
8980                         continue;
8981                 dback = to_data_backref(back);
8982                 if (dback->found_ref)
8983                         continue;
8984                 key.objectid = dback->root;
8985                 key.type = BTRFS_ROOT_ITEM_KEY;
8986                 key.offset = (u64)-1;
8987
8988                 dest_root = btrfs_read_fs_root(fs_info, &key);
8989
8990                 /* For non-exist root we just skip it */
8991                 if (IS_ERR(dest_root) || !dest_root)
8992                         continue;
8993
8994                 key.objectid = dback->owner;
8995                 key.type = BTRFS_EXTENT_DATA_KEY;
8996                 key.offset = dback->offset;
8997
8998                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8999                 btrfs_release_path(&path);
9000                 /*
9001                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9002                  * we need to record it for inode/file extent rebuild.
9003                  * For ret > 0, we record it only for file extent rebuild.
9004                  * For ret == 0, the file extent exists but only bytenr
9005                  * mismatch, let the original bytenr fix routine to handle,
9006                  * don't record it.
9007                  */
9008                 if (ret == 0)
9009                         continue;
9010                 ret = 0;
9011                 orphan = malloc(sizeof(*orphan));
9012                 if (!orphan) {
9013                         ret = -ENOMEM;
9014                         goto out;
9015                 }
9016                 INIT_LIST_HEAD(&orphan->list);
9017                 orphan->root = dback->root;
9018                 orphan->objectid = dback->owner;
9019                 orphan->offset = dback->offset;
9020                 orphan->disk_bytenr = rec->cache.start;
9021                 orphan->disk_len = rec->cache.size;
9022                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9023                 recorded_data_ref = 1;
9024         }
9025 out:
9026         btrfs_release_path(&path);
9027         if (!ret)
9028                 return !recorded_data_ref;
9029         else
9030                 return ret;
9031 }
9032
9033 /*
9034  * when an incorrect extent item is found, this will delete
9035  * all of the existing entries for it and recreate them
9036  * based on what the tree scan found.
9037  */
9038 static int fixup_extent_refs(struct btrfs_fs_info *info,
9039                              struct cache_tree *extent_cache,
9040                              struct extent_record *rec)
9041 {
9042         struct btrfs_trans_handle *trans = NULL;
9043         int ret;
9044         struct btrfs_path path;
9045         struct list_head *cur = rec->backrefs.next;
9046         struct cache_extent *cache;
9047         struct extent_backref *back;
9048         int allocated = 0;
9049         u64 flags = 0;
9050
9051         if (rec->flag_block_full_backref)
9052                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9053
9054         btrfs_init_path(&path);
9055         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9056                 /*
9057                  * Sometimes the backrefs themselves are so broken they don't
9058                  * get attached to any meaningful rec, so first go back and
9059                  * check any of our backrefs that we couldn't find and throw
9060                  * them into the list if we find the backref so that
9061                  * verify_backrefs can figure out what to do.
9062                  */
9063                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9064                 if (ret < 0)
9065                         goto out;
9066         }
9067
9068         /* step one, make sure all of the backrefs agree */
9069         ret = verify_backrefs(info, &path, rec);
9070         if (ret < 0)
9071                 goto out;
9072
9073         trans = btrfs_start_transaction(info->extent_root, 1);
9074         if (IS_ERR(trans)) {
9075                 ret = PTR_ERR(trans);
9076                 goto out;
9077         }
9078
9079         /* step two, delete all the existing records */
9080         ret = delete_extent_records(trans, info->extent_root, &path,
9081                                     rec->start);
9082
9083         if (ret < 0)
9084                 goto out;
9085
9086         /* was this block corrupt?  If so, don't add references to it */
9087         cache = lookup_cache_extent(info->corrupt_blocks,
9088                                     rec->start, rec->max_size);
9089         if (cache) {
9090                 ret = 0;
9091                 goto out;
9092         }
9093
9094         /* step three, recreate all the refs we did find */
9095         while(cur != &rec->backrefs) {
9096                 back = to_extent_backref(cur);
9097                 cur = cur->next;
9098
9099                 /*
9100                  * if we didn't find any references, don't create a
9101                  * new extent record
9102                  */
9103                 if (!back->found_ref)
9104                         continue;
9105
9106                 rec->bad_full_backref = 0;
9107                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9108                 allocated = 1;
9109
9110                 if (ret)
9111                         goto out;
9112         }
9113 out:
9114         if (trans) {
9115                 int err = btrfs_commit_transaction(trans, info->extent_root);
9116                 if (!ret)
9117                         ret = err;
9118         }
9119
9120         if (!ret)
9121                 fprintf(stderr, "Repaired extent references for %llu\n",
9122                                 (unsigned long long)rec->start);
9123
9124         btrfs_release_path(&path);
9125         return ret;
9126 }
9127
9128 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9129                               struct extent_record *rec)
9130 {
9131         struct btrfs_trans_handle *trans;
9132         struct btrfs_root *root = fs_info->extent_root;
9133         struct btrfs_path path;
9134         struct btrfs_extent_item *ei;
9135         struct btrfs_key key;
9136         u64 flags;
9137         int ret = 0;
9138
9139         key.objectid = rec->start;
9140         if (rec->metadata) {
9141                 key.type = BTRFS_METADATA_ITEM_KEY;
9142                 key.offset = rec->info_level;
9143         } else {
9144                 key.type = BTRFS_EXTENT_ITEM_KEY;
9145                 key.offset = rec->max_size;
9146         }
9147
9148         trans = btrfs_start_transaction(root, 0);
9149         if (IS_ERR(trans))
9150                 return PTR_ERR(trans);
9151
9152         btrfs_init_path(&path);
9153         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9154         if (ret < 0) {
9155                 btrfs_release_path(&path);
9156                 btrfs_commit_transaction(trans, root);
9157                 return ret;
9158         } else if (ret) {
9159                 fprintf(stderr, "Didn't find extent for %llu\n",
9160                         (unsigned long long)rec->start);
9161                 btrfs_release_path(&path);
9162                 btrfs_commit_transaction(trans, root);
9163                 return -ENOENT;
9164         }
9165
9166         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9167                             struct btrfs_extent_item);
9168         flags = btrfs_extent_flags(path.nodes[0], ei);
9169         if (rec->flag_block_full_backref) {
9170                 fprintf(stderr, "setting full backref on %llu\n",
9171                         (unsigned long long)key.objectid);
9172                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9173         } else {
9174                 fprintf(stderr, "clearing full backref on %llu\n",
9175                         (unsigned long long)key.objectid);
9176                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9177         }
9178         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9179         btrfs_mark_buffer_dirty(path.nodes[0]);
9180         btrfs_release_path(&path);
9181         ret = btrfs_commit_transaction(trans, root);
9182         if (!ret)
9183                 fprintf(stderr, "Repaired extent flags for %llu\n",
9184                                 (unsigned long long)rec->start);
9185
9186         return ret;
9187 }
9188
9189 /* right now we only prune from the extent allocation tree */
9190 static int prune_one_block(struct btrfs_trans_handle *trans,
9191                            struct btrfs_fs_info *info,
9192                            struct btrfs_corrupt_block *corrupt)
9193 {
9194         int ret;
9195         struct btrfs_path path;
9196         struct extent_buffer *eb;
9197         u64 found;
9198         int slot;
9199         int nritems;
9200         int level = corrupt->level + 1;
9201
9202         btrfs_init_path(&path);
9203 again:
9204         /* we want to stop at the parent to our busted block */
9205         path.lowest_level = level;
9206
9207         ret = btrfs_search_slot(trans, info->extent_root,
9208                                 &corrupt->key, &path, -1, 1);
9209
9210         if (ret < 0)
9211                 goto out;
9212
9213         eb = path.nodes[level];
9214         if (!eb) {
9215                 ret = -ENOENT;
9216                 goto out;
9217         }
9218
9219         /*
9220          * hopefully the search gave us the block we want to prune,
9221          * lets try that first
9222          */
9223         slot = path.slots[level];
9224         found =  btrfs_node_blockptr(eb, slot);
9225         if (found == corrupt->cache.start)
9226                 goto del_ptr;
9227
9228         nritems = btrfs_header_nritems(eb);
9229
9230         /* the search failed, lets scan this node and hope we find it */
9231         for (slot = 0; slot < nritems; slot++) {
9232                 found =  btrfs_node_blockptr(eb, slot);
9233                 if (found == corrupt->cache.start)
9234                         goto del_ptr;
9235         }
9236         /*
9237          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9238          * to this block
9239          */
9240         if (eb == info->extent_root->node) {
9241                 ret = -ENOENT;
9242                 goto out;
9243         } else {
9244                 level++;
9245                 btrfs_release_path(&path);
9246                 goto again;
9247         }
9248
9249 del_ptr:
9250         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9251         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9252
9253 out:
9254         btrfs_release_path(&path);
9255         return ret;
9256 }
9257
9258 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9259 {
9260         struct btrfs_trans_handle *trans = NULL;
9261         struct cache_extent *cache;
9262         struct btrfs_corrupt_block *corrupt;
9263
9264         while (1) {
9265                 cache = search_cache_extent(info->corrupt_blocks, 0);
9266                 if (!cache)
9267                         break;
9268                 if (!trans) {
9269                         trans = btrfs_start_transaction(info->extent_root, 1);
9270                         if (IS_ERR(trans))
9271                                 return PTR_ERR(trans);
9272                 }
9273                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9274                 prune_one_block(trans, info, corrupt);
9275                 remove_cache_extent(info->corrupt_blocks, cache);
9276         }
9277         if (trans)
9278                 return btrfs_commit_transaction(trans, info->extent_root);
9279         return 0;
9280 }
9281
9282 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9283 {
9284         struct btrfs_block_group_cache *cache;
9285         u64 start, end;
9286         int ret;
9287
9288         while (1) {
9289                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9290                                             &start, &end, EXTENT_DIRTY);
9291                 if (ret)
9292                         break;
9293                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9294         }
9295
9296         start = 0;
9297         while (1) {
9298                 cache = btrfs_lookup_first_block_group(fs_info, start);
9299                 if (!cache)
9300                         break;
9301                 if (cache->cached)
9302                         cache->cached = 0;
9303                 start = cache->key.objectid + cache->key.offset;
9304         }
9305 }
9306
9307 static int check_extent_refs(struct btrfs_root *root,
9308                              struct cache_tree *extent_cache)
9309 {
9310         struct extent_record *rec;
9311         struct cache_extent *cache;
9312         int ret = 0;
9313         int had_dups = 0;
9314
9315         if (repair) {
9316                 /*
9317                  * if we're doing a repair, we have to make sure
9318                  * we don't allocate from the problem extents.
9319                  * In the worst case, this will be all the
9320                  * extents in the FS
9321                  */
9322                 cache = search_cache_extent(extent_cache, 0);
9323                 while(cache) {
9324                         rec = container_of(cache, struct extent_record, cache);
9325                         set_extent_dirty(root->fs_info->excluded_extents,
9326                                          rec->start,
9327                                          rec->start + rec->max_size - 1);
9328                         cache = next_cache_extent(cache);
9329                 }
9330
9331                 /* pin down all the corrupted blocks too */
9332                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9333                 while(cache) {
9334                         set_extent_dirty(root->fs_info->excluded_extents,
9335                                          cache->start,
9336                                          cache->start + cache->size - 1);
9337                         cache = next_cache_extent(cache);
9338                 }
9339                 prune_corrupt_blocks(root->fs_info);
9340                 reset_cached_block_groups(root->fs_info);
9341         }
9342
9343         reset_cached_block_groups(root->fs_info);
9344
9345         /*
9346          * We need to delete any duplicate entries we find first otherwise we
9347          * could mess up the extent tree when we have backrefs that actually
9348          * belong to a different extent item and not the weird duplicate one.
9349          */
9350         while (repair && !list_empty(&duplicate_extents)) {
9351                 rec = to_extent_record(duplicate_extents.next);
9352                 list_del_init(&rec->list);
9353
9354                 /* Sometimes we can find a backref before we find an actual
9355                  * extent, so we need to process it a little bit to see if there
9356                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9357                  * if this is a backref screwup.  If we need to delete stuff
9358                  * process_duplicates() will return 0, otherwise it will return
9359                  * 1 and we
9360                  */
9361                 if (process_duplicates(extent_cache, rec))
9362                         continue;
9363                 ret = delete_duplicate_records(root, rec);
9364                 if (ret < 0)
9365                         return ret;
9366                 /*
9367                  * delete_duplicate_records will return the number of entries
9368                  * deleted, so if it's greater than 0 then we know we actually
9369                  * did something and we need to remove.
9370                  */
9371                 if (ret)
9372                         had_dups = 1;
9373         }
9374
9375         if (had_dups)
9376                 return -EAGAIN;
9377
9378         while(1) {
9379                 int cur_err = 0;
9380                 int fix = 0;
9381
9382                 cache = search_cache_extent(extent_cache, 0);
9383                 if (!cache)
9384                         break;
9385                 rec = container_of(cache, struct extent_record, cache);
9386                 if (rec->num_duplicates) {
9387                         fprintf(stderr, "extent item %llu has multiple extent "
9388                                 "items\n", (unsigned long long)rec->start);
9389                         cur_err = 1;
9390                 }
9391
9392                 if (rec->refs != rec->extent_item_refs) {
9393                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9394                                 (unsigned long long)rec->start,
9395                                 (unsigned long long)rec->nr);
9396                         fprintf(stderr, "extent item %llu, found %llu\n",
9397                                 (unsigned long long)rec->extent_item_refs,
9398                                 (unsigned long long)rec->refs);
9399                         ret = record_orphan_data_extents(root->fs_info, rec);
9400                         if (ret < 0)
9401                                 goto repair_abort;
9402                         fix = ret;
9403                         cur_err = 1;
9404                 }
9405                 if (all_backpointers_checked(rec, 1)) {
9406                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9407                                 (unsigned long long)rec->start,
9408                                 (unsigned long long)rec->nr);
9409                         fix = 1;
9410                         cur_err = 1;
9411                 }
9412                 if (!rec->owner_ref_checked) {
9413                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9414                                 (unsigned long long)rec->start,
9415                                 (unsigned long long)rec->nr);
9416                         fix = 1;
9417                         cur_err = 1;
9418                 }
9419
9420                 if (repair && fix) {
9421                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9422                         if (ret)
9423                                 goto repair_abort;
9424                 }
9425
9426
9427                 if (rec->bad_full_backref) {
9428                         fprintf(stderr, "bad full backref, on [%llu]\n",
9429                                 (unsigned long long)rec->start);
9430                         if (repair) {
9431                                 ret = fixup_extent_flags(root->fs_info, rec);
9432                                 if (ret)
9433                                         goto repair_abort;
9434                                 fix = 1;
9435                         }
9436                         cur_err = 1;
9437                 }
9438                 /*
9439                  * Although it's not a extent ref's problem, we reuse this
9440                  * routine for error reporting.
9441                  * No repair function yet.
9442                  */
9443                 if (rec->crossing_stripes) {
9444                         fprintf(stderr,
9445                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9446                                 rec->start, rec->start + rec->max_size);
9447                         cur_err = 1;
9448                 }
9449
9450                 if (rec->wrong_chunk_type) {
9451                         fprintf(stderr,
9452                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9453                                 rec->start, rec->start + rec->max_size);
9454                         cur_err = 1;
9455                 }
9456
9457                 remove_cache_extent(extent_cache, cache);
9458                 free_all_extent_backrefs(rec);
9459                 if (!init_extent_tree && repair && (!cur_err || fix))
9460                         clear_extent_dirty(root->fs_info->excluded_extents,
9461                                            rec->start,
9462                                            rec->start + rec->max_size - 1);
9463                 free(rec);
9464         }
9465 repair_abort:
9466         if (repair) {
9467                 if (ret && ret != -EAGAIN) {
9468                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9469                         exit(1);
9470                 } else if (!ret) {
9471                         struct btrfs_trans_handle *trans;
9472
9473                         root = root->fs_info->extent_root;
9474                         trans = btrfs_start_transaction(root, 1);
9475                         if (IS_ERR(trans)) {
9476                                 ret = PTR_ERR(trans);
9477                                 goto repair_abort;
9478                         }
9479
9480                         btrfs_fix_block_accounting(trans, root);
9481                         ret = btrfs_commit_transaction(trans, root);
9482                         if (ret)
9483                                 goto repair_abort;
9484                 }
9485                 return ret;
9486         }
9487         return 0;
9488 }
9489
9490 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9491 {
9492         u64 stripe_size;
9493
9494         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9495                 stripe_size = length;
9496                 stripe_size /= num_stripes;
9497         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9498                 stripe_size = length * 2;
9499                 stripe_size /= num_stripes;
9500         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9501                 stripe_size = length;
9502                 stripe_size /= (num_stripes - 1);
9503         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9504                 stripe_size = length;
9505                 stripe_size /= (num_stripes - 2);
9506         } else {
9507                 stripe_size = length;
9508         }
9509         return stripe_size;
9510 }
9511
9512 /*
9513  * Check the chunk with its block group/dev list ref:
9514  * Return 0 if all refs seems valid.
9515  * Return 1 if part of refs seems valid, need later check for rebuild ref
9516  * like missing block group and needs to search extent tree to rebuild them.
9517  * Return -1 if essential refs are missing and unable to rebuild.
9518  */
9519 static int check_chunk_refs(struct chunk_record *chunk_rec,
9520                             struct block_group_tree *block_group_cache,
9521                             struct device_extent_tree *dev_extent_cache,
9522                             int silent)
9523 {
9524         struct cache_extent *block_group_item;
9525         struct block_group_record *block_group_rec;
9526         struct cache_extent *dev_extent_item;
9527         struct device_extent_record *dev_extent_rec;
9528         u64 devid;
9529         u64 offset;
9530         u64 length;
9531         int metadump_v2 = 0;
9532         int i;
9533         int ret = 0;
9534
9535         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9536                                                chunk_rec->offset,
9537                                                chunk_rec->length);
9538         if (block_group_item) {
9539                 block_group_rec = container_of(block_group_item,
9540                                                struct block_group_record,
9541                                                cache);
9542                 if (chunk_rec->length != block_group_rec->offset ||
9543                     chunk_rec->offset != block_group_rec->objectid ||
9544                     (!metadump_v2 &&
9545                      chunk_rec->type_flags != block_group_rec->flags)) {
9546                         if (!silent)
9547                                 fprintf(stderr,
9548                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9549                                         chunk_rec->objectid,
9550                                         chunk_rec->type,
9551                                         chunk_rec->offset,
9552                                         chunk_rec->length,
9553                                         chunk_rec->offset,
9554                                         chunk_rec->type_flags,
9555                                         block_group_rec->objectid,
9556                                         block_group_rec->type,
9557                                         block_group_rec->offset,
9558                                         block_group_rec->offset,
9559                                         block_group_rec->objectid,
9560                                         block_group_rec->flags);
9561                         ret = -1;
9562                 } else {
9563                         list_del_init(&block_group_rec->list);
9564                         chunk_rec->bg_rec = block_group_rec;
9565                 }
9566         } else {
9567                 if (!silent)
9568                         fprintf(stderr,
9569                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9570                                 chunk_rec->objectid,
9571                                 chunk_rec->type,
9572                                 chunk_rec->offset,
9573                                 chunk_rec->length,
9574                                 chunk_rec->offset,
9575                                 chunk_rec->type_flags);
9576                 ret = 1;
9577         }
9578
9579         if (metadump_v2)
9580                 return ret;
9581
9582         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9583                                     chunk_rec->num_stripes);
9584         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9585                 devid = chunk_rec->stripes[i].devid;
9586                 offset = chunk_rec->stripes[i].offset;
9587                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9588                                                        devid, offset, length);
9589                 if (dev_extent_item) {
9590                         dev_extent_rec = container_of(dev_extent_item,
9591                                                 struct device_extent_record,
9592                                                 cache);
9593                         if (dev_extent_rec->objectid != devid ||
9594                             dev_extent_rec->offset != offset ||
9595                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9596                             dev_extent_rec->length != length) {
9597                                 if (!silent)
9598                                         fprintf(stderr,
9599                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9600                                                 chunk_rec->objectid,
9601                                                 chunk_rec->type,
9602                                                 chunk_rec->offset,
9603                                                 chunk_rec->stripes[i].devid,
9604                                                 chunk_rec->stripes[i].offset,
9605                                                 dev_extent_rec->objectid,
9606                                                 dev_extent_rec->offset,
9607                                                 dev_extent_rec->length);
9608                                 ret = -1;
9609                         } else {
9610                                 list_move(&dev_extent_rec->chunk_list,
9611                                           &chunk_rec->dextents);
9612                         }
9613                 } else {
9614                         if (!silent)
9615                                 fprintf(stderr,
9616                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9617                                         chunk_rec->objectid,
9618                                         chunk_rec->type,
9619                                         chunk_rec->offset,
9620                                         chunk_rec->stripes[i].devid,
9621                                         chunk_rec->stripes[i].offset);
9622                         ret = -1;
9623                 }
9624         }
9625         return ret;
9626 }
9627
9628 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9629 int check_chunks(struct cache_tree *chunk_cache,
9630                  struct block_group_tree *block_group_cache,
9631                  struct device_extent_tree *dev_extent_cache,
9632                  struct list_head *good, struct list_head *bad,
9633                  struct list_head *rebuild, int silent)
9634 {
9635         struct cache_extent *chunk_item;
9636         struct chunk_record *chunk_rec;
9637         struct block_group_record *bg_rec;
9638         struct device_extent_record *dext_rec;
9639         int err;
9640         int ret = 0;
9641
9642         chunk_item = first_cache_extent(chunk_cache);
9643         while (chunk_item) {
9644                 chunk_rec = container_of(chunk_item, struct chunk_record,
9645                                          cache);
9646                 err = check_chunk_refs(chunk_rec, block_group_cache,
9647                                        dev_extent_cache, silent);
9648                 if (err < 0)
9649                         ret = err;
9650                 if (err == 0 && good)
9651                         list_add_tail(&chunk_rec->list, good);
9652                 if (err > 0 && rebuild)
9653                         list_add_tail(&chunk_rec->list, rebuild);
9654                 if (err < 0 && bad)
9655                         list_add_tail(&chunk_rec->list, bad);
9656                 chunk_item = next_cache_extent(chunk_item);
9657         }
9658
9659         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9660                 if (!silent)
9661                         fprintf(stderr,
9662                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9663                                 bg_rec->objectid,
9664                                 bg_rec->offset,
9665                                 bg_rec->flags);
9666                 if (!ret)
9667                         ret = 1;
9668         }
9669
9670         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9671                             chunk_list) {
9672                 if (!silent)
9673                         fprintf(stderr,
9674                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9675                                 dext_rec->objectid,
9676                                 dext_rec->offset,
9677                                 dext_rec->length);
9678                 if (!ret)
9679                         ret = 1;
9680         }
9681         return ret;
9682 }
9683
9684
9685 static int check_device_used(struct device_record *dev_rec,
9686                              struct device_extent_tree *dext_cache)
9687 {
9688         struct cache_extent *cache;
9689         struct device_extent_record *dev_extent_rec;
9690         u64 total_byte = 0;
9691
9692         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9693         while (cache) {
9694                 dev_extent_rec = container_of(cache,
9695                                               struct device_extent_record,
9696                                               cache);
9697                 if (dev_extent_rec->objectid != dev_rec->devid)
9698                         break;
9699
9700                 list_del_init(&dev_extent_rec->device_list);
9701                 total_byte += dev_extent_rec->length;
9702                 cache = next_cache_extent(cache);
9703         }
9704
9705         if (total_byte != dev_rec->byte_used) {
9706                 fprintf(stderr,
9707                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9708                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9709                         dev_rec->type, dev_rec->offset);
9710                 return -1;
9711         } else {
9712                 return 0;
9713         }
9714 }
9715
9716 /* check btrfs_dev_item -> btrfs_dev_extent */
9717 static int check_devices(struct rb_root *dev_cache,
9718                          struct device_extent_tree *dev_extent_cache)
9719 {
9720         struct rb_node *dev_node;
9721         struct device_record *dev_rec;
9722         struct device_extent_record *dext_rec;
9723         int err;
9724         int ret = 0;
9725
9726         dev_node = rb_first(dev_cache);
9727         while (dev_node) {
9728                 dev_rec = container_of(dev_node, struct device_record, node);
9729                 err = check_device_used(dev_rec, dev_extent_cache);
9730                 if (err)
9731                         ret = err;
9732
9733                 dev_node = rb_next(dev_node);
9734         }
9735         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9736                             device_list) {
9737                 fprintf(stderr,
9738                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9739                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9740                 if (!ret)
9741                         ret = 1;
9742         }
9743         return ret;
9744 }
9745
9746 static int add_root_item_to_list(struct list_head *head,
9747                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9748                                   u8 level, u8 drop_level,
9749                                   int level_size, struct btrfs_key *drop_key)
9750 {
9751
9752         struct root_item_record *ri_rec;
9753         ri_rec = malloc(sizeof(*ri_rec));
9754         if (!ri_rec)
9755                 return -ENOMEM;
9756         ri_rec->bytenr = bytenr;
9757         ri_rec->objectid = objectid;
9758         ri_rec->level = level;
9759         ri_rec->level_size = level_size;
9760         ri_rec->drop_level = drop_level;
9761         ri_rec->last_snapshot = last_snapshot;
9762         if (drop_key)
9763                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9764         list_add_tail(&ri_rec->list, head);
9765
9766         return 0;
9767 }
9768
9769 static void free_root_item_list(struct list_head *list)
9770 {
9771         struct root_item_record *ri_rec;
9772
9773         while (!list_empty(list)) {
9774                 ri_rec = list_first_entry(list, struct root_item_record,
9775                                           list);
9776                 list_del_init(&ri_rec->list);
9777                 free(ri_rec);
9778         }
9779 }
9780
9781 static int deal_root_from_list(struct list_head *list,
9782                                struct btrfs_root *root,
9783                                struct block_info *bits,
9784                                int bits_nr,
9785                                struct cache_tree *pending,
9786                                struct cache_tree *seen,
9787                                struct cache_tree *reada,
9788                                struct cache_tree *nodes,
9789                                struct cache_tree *extent_cache,
9790                                struct cache_tree *chunk_cache,
9791                                struct rb_root *dev_cache,
9792                                struct block_group_tree *block_group_cache,
9793                                struct device_extent_tree *dev_extent_cache)
9794 {
9795         int ret = 0;
9796         u64 last;
9797
9798         while (!list_empty(list)) {
9799                 struct root_item_record *rec;
9800                 struct extent_buffer *buf;
9801                 rec = list_entry(list->next,
9802                                  struct root_item_record, list);
9803                 last = 0;
9804                 buf = read_tree_block(root->fs_info,
9805                                       rec->bytenr, rec->level_size, 0);
9806                 if (!extent_buffer_uptodate(buf)) {
9807                         free_extent_buffer(buf);
9808                         ret = -EIO;
9809                         break;
9810                 }
9811                 ret = add_root_to_pending(buf, extent_cache, pending,
9812                                     seen, nodes, rec->objectid);
9813                 if (ret < 0)
9814                         break;
9815                 /*
9816                  * To rebuild extent tree, we need deal with snapshot
9817                  * one by one, otherwise we deal with node firstly which
9818                  * can maximize readahead.
9819                  */
9820                 while (1) {
9821                         ret = run_next_block(root, bits, bits_nr, &last,
9822                                              pending, seen, reada, nodes,
9823                                              extent_cache, chunk_cache,
9824                                              dev_cache, block_group_cache,
9825                                              dev_extent_cache, rec);
9826                         if (ret != 0)
9827                                 break;
9828                 }
9829                 free_extent_buffer(buf);
9830                 list_del(&rec->list);
9831                 free(rec);
9832                 if (ret < 0)
9833                         break;
9834         }
9835         while (ret >= 0) {
9836                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9837                                      reada, nodes, extent_cache, chunk_cache,
9838                                      dev_cache, block_group_cache,
9839                                      dev_extent_cache, NULL);
9840                 if (ret != 0) {
9841                         if (ret > 0)
9842                                 ret = 0;
9843                         break;
9844                 }
9845         }
9846         return ret;
9847 }
9848
9849 static int check_chunks_and_extents(struct btrfs_root *root)
9850 {
9851         struct rb_root dev_cache;
9852         struct cache_tree chunk_cache;
9853         struct block_group_tree block_group_cache;
9854         struct device_extent_tree dev_extent_cache;
9855         struct cache_tree extent_cache;
9856         struct cache_tree seen;
9857         struct cache_tree pending;
9858         struct cache_tree reada;
9859         struct cache_tree nodes;
9860         struct extent_io_tree excluded_extents;
9861         struct cache_tree corrupt_blocks;
9862         struct btrfs_path path;
9863         struct btrfs_key key;
9864         struct btrfs_key found_key;
9865         int ret, err = 0;
9866         struct block_info *bits;
9867         int bits_nr;
9868         struct extent_buffer *leaf;
9869         int slot;
9870         struct btrfs_root_item ri;
9871         struct list_head dropping_trees;
9872         struct list_head normal_trees;
9873         struct btrfs_root *root1;
9874         u64 objectid;
9875         u32 level_size;
9876         u8 level;
9877
9878         dev_cache = RB_ROOT;
9879         cache_tree_init(&chunk_cache);
9880         block_group_tree_init(&block_group_cache);
9881         device_extent_tree_init(&dev_extent_cache);
9882
9883         cache_tree_init(&extent_cache);
9884         cache_tree_init(&seen);
9885         cache_tree_init(&pending);
9886         cache_tree_init(&nodes);
9887         cache_tree_init(&reada);
9888         cache_tree_init(&corrupt_blocks);
9889         extent_io_tree_init(&excluded_extents);
9890         INIT_LIST_HEAD(&dropping_trees);
9891         INIT_LIST_HEAD(&normal_trees);
9892
9893         if (repair) {
9894                 root->fs_info->excluded_extents = &excluded_extents;
9895                 root->fs_info->fsck_extent_cache = &extent_cache;
9896                 root->fs_info->free_extent_hook = free_extent_hook;
9897                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9898         }
9899
9900         bits_nr = 1024;
9901         bits = malloc(bits_nr * sizeof(struct block_info));
9902         if (!bits) {
9903                 perror("malloc");
9904                 exit(1);
9905         }
9906
9907         if (ctx.progress_enabled) {
9908                 ctx.tp = TASK_EXTENTS;
9909                 task_start(ctx.info);
9910         }
9911
9912 again:
9913         root1 = root->fs_info->tree_root;
9914         level = btrfs_header_level(root1->node);
9915         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9916                                     root1->node->start, 0, level, 0,
9917                                     root1->fs_info->nodesize, NULL);
9918         if (ret < 0)
9919                 goto out;
9920         root1 = root->fs_info->chunk_root;
9921         level = btrfs_header_level(root1->node);
9922         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9923                                     root1->node->start, 0, level, 0,
9924                                     root1->fs_info->nodesize, NULL);
9925         if (ret < 0)
9926                 goto out;
9927         btrfs_init_path(&path);
9928         key.offset = 0;
9929         key.objectid = 0;
9930         key.type = BTRFS_ROOT_ITEM_KEY;
9931         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9932                                         &key, &path, 0, 0);
9933         if (ret < 0)
9934                 goto out;
9935         while(1) {
9936                 leaf = path.nodes[0];
9937                 slot = path.slots[0];
9938                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9939                         ret = btrfs_next_leaf(root, &path);
9940                         if (ret != 0)
9941                                 break;
9942                         leaf = path.nodes[0];
9943                         slot = path.slots[0];
9944                 }
9945                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9946                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9947                         unsigned long offset;
9948                         u64 last_snapshot;
9949
9950                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9951                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9952                         last_snapshot = btrfs_root_last_snapshot(&ri);
9953                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9954                                 level = btrfs_root_level(&ri);
9955                                 level_size = root->fs_info->nodesize;
9956                                 ret = add_root_item_to_list(&normal_trees,
9957                                                 found_key.objectid,
9958                                                 btrfs_root_bytenr(&ri),
9959                                                 last_snapshot, level,
9960                                                 0, level_size, NULL);
9961                                 if (ret < 0)
9962                                         goto out;
9963                         } else {
9964                                 level = btrfs_root_level(&ri);
9965                                 level_size = root->fs_info->nodesize;
9966                                 objectid = found_key.objectid;
9967                                 btrfs_disk_key_to_cpu(&found_key,
9968                                                       &ri.drop_progress);
9969                                 ret = add_root_item_to_list(&dropping_trees,
9970                                                 objectid,
9971                                                 btrfs_root_bytenr(&ri),
9972                                                 last_snapshot, level,
9973                                                 ri.drop_level,
9974                                                 level_size, &found_key);
9975                                 if (ret < 0)
9976                                         goto out;
9977                         }
9978                 }
9979                 path.slots[0]++;
9980         }
9981         btrfs_release_path(&path);
9982
9983         /*
9984          * check_block can return -EAGAIN if it fixes something, please keep
9985          * this in mind when dealing with return values from these functions, if
9986          * we get -EAGAIN we want to fall through and restart the loop.
9987          */
9988         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9989                                   &seen, &reada, &nodes, &extent_cache,
9990                                   &chunk_cache, &dev_cache, &block_group_cache,
9991                                   &dev_extent_cache);
9992         if (ret < 0) {
9993                 if (ret == -EAGAIN)
9994                         goto loop;
9995                 goto out;
9996         }
9997         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9998                                   &pending, &seen, &reada, &nodes,
9999                                   &extent_cache, &chunk_cache, &dev_cache,
10000                                   &block_group_cache, &dev_extent_cache);
10001         if (ret < 0) {
10002                 if (ret == -EAGAIN)
10003                         goto loop;
10004                 goto out;
10005         }
10006
10007         ret = check_chunks(&chunk_cache, &block_group_cache,
10008                            &dev_extent_cache, NULL, NULL, NULL, 0);
10009         if (ret) {
10010                 if (ret == -EAGAIN)
10011                         goto loop;
10012                 err = ret;
10013         }
10014
10015         ret = check_extent_refs(root, &extent_cache);
10016         if (ret < 0) {
10017                 if (ret == -EAGAIN)
10018                         goto loop;
10019                 goto out;
10020         }
10021
10022         ret = check_devices(&dev_cache, &dev_extent_cache);
10023         if (ret && err)
10024                 ret = err;
10025
10026 out:
10027         task_stop(ctx.info);
10028         if (repair) {
10029                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10030                 extent_io_tree_cleanup(&excluded_extents);
10031                 root->fs_info->fsck_extent_cache = NULL;
10032                 root->fs_info->free_extent_hook = NULL;
10033                 root->fs_info->corrupt_blocks = NULL;
10034                 root->fs_info->excluded_extents = NULL;
10035         }
10036         free(bits);
10037         free_chunk_cache_tree(&chunk_cache);
10038         free_device_cache_tree(&dev_cache);
10039         free_block_group_tree(&block_group_cache);
10040         free_device_extent_tree(&dev_extent_cache);
10041         free_extent_cache_tree(&seen);
10042         free_extent_cache_tree(&pending);
10043         free_extent_cache_tree(&reada);
10044         free_extent_cache_tree(&nodes);
10045         free_root_item_list(&normal_trees);
10046         free_root_item_list(&dropping_trees);
10047         return ret;
10048 loop:
10049         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10050         free_extent_cache_tree(&seen);
10051         free_extent_cache_tree(&pending);
10052         free_extent_cache_tree(&reada);
10053         free_extent_cache_tree(&nodes);
10054         free_chunk_cache_tree(&chunk_cache);
10055         free_block_group_tree(&block_group_cache);
10056         free_device_cache_tree(&dev_cache);
10057         free_device_extent_tree(&dev_extent_cache);
10058         free_extent_record_cache(&extent_cache);
10059         free_root_item_list(&normal_trees);
10060         free_root_item_list(&dropping_trees);
10061         extent_io_tree_cleanup(&excluded_extents);
10062         goto again;
10063 }
10064
10065 /*
10066  * Check backrefs of a tree block given by @bytenr or @eb.
10067  *
10068  * @root:       the root containing the @bytenr or @eb
10069  * @eb:         tree block extent buffer, can be NULL
10070  * @bytenr:     bytenr of the tree block to search
10071  * @level:      tree level of the tree block
10072  * @owner:      owner of the tree block
10073  *
10074  * Return >0 for any error found and output error message
10075  * Return 0 for no error found
10076  */
10077 static int check_tree_block_ref(struct btrfs_root *root,
10078                                 struct extent_buffer *eb, u64 bytenr,
10079                                 int level, u64 owner)
10080 {
10081         struct btrfs_key key;
10082         struct btrfs_root *extent_root = root->fs_info->extent_root;
10083         struct btrfs_path path;
10084         struct btrfs_extent_item *ei;
10085         struct btrfs_extent_inline_ref *iref;
10086         struct extent_buffer *leaf;
10087         unsigned long end;
10088         unsigned long ptr;
10089         int slot;
10090         int skinny_level;
10091         int type;
10092         u32 nodesize = root->fs_info->nodesize;
10093         u32 item_size;
10094         u64 offset;
10095         int tree_reloc_root = 0;
10096         int found_ref = 0;
10097         int err = 0;
10098         int ret;
10099
10100         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10101             btrfs_header_bytenr(root->node) == bytenr)
10102                 tree_reloc_root = 1;
10103
10104         btrfs_init_path(&path);
10105         key.objectid = bytenr;
10106         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10107                 key.type = BTRFS_METADATA_ITEM_KEY;
10108         else
10109                 key.type = BTRFS_EXTENT_ITEM_KEY;
10110         key.offset = (u64)-1;
10111
10112         /* Search for the backref in extent tree */
10113         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10114         if (ret < 0) {
10115                 err |= BACKREF_MISSING;
10116                 goto out;
10117         }
10118         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10119         if (ret) {
10120                 err |= BACKREF_MISSING;
10121                 goto out;
10122         }
10123
10124         leaf = path.nodes[0];
10125         slot = path.slots[0];
10126         btrfs_item_key_to_cpu(leaf, &key, slot);
10127
10128         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10129
10130         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10131                 skinny_level = (int)key.offset;
10132                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10133         } else {
10134                 struct btrfs_tree_block_info *info;
10135
10136                 info = (struct btrfs_tree_block_info *)(ei + 1);
10137                 skinny_level = btrfs_tree_block_level(leaf, info);
10138                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10139         }
10140
10141         if (eb) {
10142                 u64 header_gen;
10143                 u64 extent_gen;
10144
10145                 if (!(btrfs_extent_flags(leaf, ei) &
10146                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10147                         error(
10148                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10149                                 key.objectid, nodesize,
10150                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10151                         err = BACKREF_MISMATCH;
10152                 }
10153                 header_gen = btrfs_header_generation(eb);
10154                 extent_gen = btrfs_extent_generation(leaf, ei);
10155                 if (header_gen != extent_gen) {
10156                         error(
10157         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10158                                 key.objectid, nodesize, header_gen,
10159                                 extent_gen);
10160                         err = BACKREF_MISMATCH;
10161                 }
10162                 if (level != skinny_level) {
10163                         error(
10164                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10165                                 key.objectid, nodesize, level, skinny_level);
10166                         err = BACKREF_MISMATCH;
10167                 }
10168                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10169                         error(
10170                         "extent[%llu %u] is referred by other roots than %llu",
10171                                 key.objectid, nodesize, root->objectid);
10172                         err = BACKREF_MISMATCH;
10173                 }
10174         }
10175
10176         /*
10177          * Iterate the extent/metadata item to find the exact backref
10178          */
10179         item_size = btrfs_item_size_nr(leaf, slot);
10180         ptr = (unsigned long)iref;
10181         end = (unsigned long)ei + item_size;
10182         while (ptr < end) {
10183                 iref = (struct btrfs_extent_inline_ref *)ptr;
10184                 type = btrfs_extent_inline_ref_type(leaf, iref);
10185                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10186
10187                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10188                         (offset == root->objectid || offset == owner)) {
10189                         found_ref = 1;
10190                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10191                         /*
10192                          * Backref of tree reloc root points to itself, no need
10193                          * to check backref any more.
10194                          */
10195                         if (tree_reloc_root)
10196                                 found_ref = 1;
10197                         else
10198                         /* Check if the backref points to valid referencer */
10199                                 found_ref = !check_tree_block_ref(root, NULL,
10200                                                 offset, level + 1, owner);
10201                 }
10202
10203                 if (found_ref)
10204                         break;
10205                 ptr += btrfs_extent_inline_ref_size(type);
10206         }
10207
10208         /*
10209          * Inlined extent item doesn't have what we need, check
10210          * TREE_BLOCK_REF_KEY
10211          */
10212         if (!found_ref) {
10213                 btrfs_release_path(&path);
10214                 key.objectid = bytenr;
10215                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10216                 key.offset = root->objectid;
10217
10218                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10219                 if (!ret)
10220                         found_ref = 1;
10221         }
10222         if (!found_ref)
10223                 err |= BACKREF_MISSING;
10224 out:
10225         btrfs_release_path(&path);
10226         if (eb && (err & BACKREF_MISSING))
10227                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10228                         bytenr, nodesize, owner, level);
10229         return err;
10230 }
10231
10232 /*
10233  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10234  *
10235  * Return >0 any error found and output error message
10236  * Return 0 for no error found
10237  */
10238 static int check_extent_data_item(struct btrfs_root *root,
10239                                   struct extent_buffer *eb, int slot)
10240 {
10241         struct btrfs_file_extent_item *fi;
10242         struct btrfs_path path;
10243         struct btrfs_root *extent_root = root->fs_info->extent_root;
10244         struct btrfs_key fi_key;
10245         struct btrfs_key dbref_key;
10246         struct extent_buffer *leaf;
10247         struct btrfs_extent_item *ei;
10248         struct btrfs_extent_inline_ref *iref;
10249         struct btrfs_extent_data_ref *dref;
10250         u64 owner;
10251         u64 disk_bytenr;
10252         u64 disk_num_bytes;
10253         u64 extent_num_bytes;
10254         u64 extent_flags;
10255         u32 item_size;
10256         unsigned long end;
10257         unsigned long ptr;
10258         int type;
10259         u64 ref_root;
10260         int found_dbackref = 0;
10261         int err = 0;
10262         int ret;
10263
10264         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10265         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10266
10267         /* Nothing to check for hole and inline data extents */
10268         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10269             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10270                 return 0;
10271
10272         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10273         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10274         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10275
10276         /* Check unaligned disk_num_bytes and num_bytes */
10277         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10278                 error(
10279 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10280                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10281                         root->fs_info->sectorsize);
10282                 err |= BYTES_UNALIGNED;
10283         } else {
10284                 data_bytes_allocated += disk_num_bytes;
10285         }
10286         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10287                 error(
10288 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10289                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10290                         root->fs_info->sectorsize);
10291                 err |= BYTES_UNALIGNED;
10292         } else {
10293                 data_bytes_referenced += extent_num_bytes;
10294         }
10295         owner = btrfs_header_owner(eb);
10296
10297         /* Check the extent item of the file extent in extent tree */
10298         btrfs_init_path(&path);
10299         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10300         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10301         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10302
10303         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10304         if (ret)
10305                 goto out;
10306
10307         leaf = path.nodes[0];
10308         slot = path.slots[0];
10309         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10310
10311         extent_flags = btrfs_extent_flags(leaf, ei);
10312
10313         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10314                 error(
10315                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10316                     disk_bytenr, disk_num_bytes,
10317                     BTRFS_EXTENT_FLAG_DATA);
10318                 err |= BACKREF_MISMATCH;
10319         }
10320
10321         /* Check data backref inside that extent item */
10322         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10323         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10324         ptr = (unsigned long)iref;
10325         end = (unsigned long)ei + item_size;
10326         while (ptr < end) {
10327                 iref = (struct btrfs_extent_inline_ref *)ptr;
10328                 type = btrfs_extent_inline_ref_type(leaf, iref);
10329                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10330
10331                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10332                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10333                         if (ref_root == owner || ref_root == root->objectid)
10334                                 found_dbackref = 1;
10335                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10336                         found_dbackref = !check_tree_block_ref(root, NULL,
10337                                 btrfs_extent_inline_ref_offset(leaf, iref),
10338                                 0, owner);
10339                 }
10340
10341                 if (found_dbackref)
10342                         break;
10343                 ptr += btrfs_extent_inline_ref_size(type);
10344         }
10345
10346         if (!found_dbackref) {
10347                 btrfs_release_path(&path);
10348
10349                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10350                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10351                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10352                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10353                                 fi_key.objectid, fi_key.offset);
10354
10355                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10356                                         &dbref_key, &path, 0, 0);
10357                 if (!ret) {
10358                         found_dbackref = 1;
10359                         goto out;
10360                 }
10361
10362                 btrfs_release_path(&path);
10363
10364                 /*
10365                  * Neither inlined nor EXTENT_DATA_REF found, try
10366                  * SHARED_DATA_REF as last chance.
10367                  */
10368                 dbref_key.objectid = disk_bytenr;
10369                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10370                 dbref_key.offset = eb->start;
10371
10372                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10373                                         &dbref_key, &path, 0, 0);
10374                 if (!ret) {
10375                         found_dbackref = 1;
10376                         goto out;
10377                 }
10378         }
10379
10380 out:
10381         if (!found_dbackref)
10382                 err |= BACKREF_MISSING;
10383         btrfs_release_path(&path);
10384         if (err & BACKREF_MISSING) {
10385                 error("data extent[%llu %llu] backref lost",
10386                       disk_bytenr, disk_num_bytes);
10387         }
10388         return err;
10389 }
10390
10391 /*
10392  * Get real tree block level for the case like shared block
10393  * Return >= 0 as tree level
10394  * Return <0 for error
10395  */
10396 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10397 {
10398         struct extent_buffer *eb;
10399         struct btrfs_path path;
10400         struct btrfs_key key;
10401         struct btrfs_extent_item *ei;
10402         u64 flags;
10403         u64 transid;
10404         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10405         u8 backref_level;
10406         u8 header_level;
10407         int ret;
10408
10409         /* Search extent tree for extent generation and level */
10410         key.objectid = bytenr;
10411         key.type = BTRFS_METADATA_ITEM_KEY;
10412         key.offset = (u64)-1;
10413
10414         btrfs_init_path(&path);
10415         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10416         if (ret < 0)
10417                 goto release_out;
10418         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10419         if (ret < 0)
10420                 goto release_out;
10421         if (ret > 0) {
10422                 ret = -ENOENT;
10423                 goto release_out;
10424         }
10425
10426         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10427         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10428                             struct btrfs_extent_item);
10429         flags = btrfs_extent_flags(path.nodes[0], ei);
10430         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10431                 ret = -ENOENT;
10432                 goto release_out;
10433         }
10434
10435         /* Get transid for later read_tree_block() check */
10436         transid = btrfs_extent_generation(path.nodes[0], ei);
10437
10438         /* Get backref level as one source */
10439         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10440                 backref_level = key.offset;
10441         } else {
10442                 struct btrfs_tree_block_info *info;
10443
10444                 info = (struct btrfs_tree_block_info *)(ei + 1);
10445                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10446         }
10447         btrfs_release_path(&path);
10448
10449         /* Get level from tree block as an alternative source */
10450         eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10451         if (!extent_buffer_uptodate(eb)) {
10452                 free_extent_buffer(eb);
10453                 return -EIO;
10454         }
10455         header_level = btrfs_header_level(eb);
10456         free_extent_buffer(eb);
10457
10458         if (header_level != backref_level)
10459                 return -EIO;
10460         return header_level;
10461
10462 release_out:
10463         btrfs_release_path(&path);
10464         return ret;
10465 }
10466
10467 /*
10468  * Check if a tree block backref is valid (points to a valid tree block)
10469  * if level == -1, level will be resolved
10470  * Return >0 for any error found and print error message
10471  */
10472 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10473                                     u64 bytenr, int level)
10474 {
10475         struct btrfs_root *root;
10476         struct btrfs_key key;
10477         struct btrfs_path path;
10478         struct extent_buffer *eb;
10479         struct extent_buffer *node;
10480         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10481         int err = 0;
10482         int ret;
10483
10484         /* Query level for level == -1 special case */
10485         if (level == -1)
10486                 level = query_tree_block_level(fs_info, bytenr);
10487         if (level < 0) {
10488                 err |= REFERENCER_MISSING;
10489                 goto out;
10490         }
10491
10492         key.objectid = root_id;
10493         key.type = BTRFS_ROOT_ITEM_KEY;
10494         key.offset = (u64)-1;
10495
10496         root = btrfs_read_fs_root(fs_info, &key);
10497         if (IS_ERR(root)) {
10498                 err |= REFERENCER_MISSING;
10499                 goto out;
10500         }
10501
10502         /* Read out the tree block to get item/node key */
10503         eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10504         if (!extent_buffer_uptodate(eb)) {
10505                 err |= REFERENCER_MISSING;
10506                 free_extent_buffer(eb);
10507                 goto out;
10508         }
10509
10510         /* Empty tree, no need to check key */
10511         if (!btrfs_header_nritems(eb) && !level) {
10512                 free_extent_buffer(eb);
10513                 goto out;
10514         }
10515
10516         if (level)
10517                 btrfs_node_key_to_cpu(eb, &key, 0);
10518         else
10519                 btrfs_item_key_to_cpu(eb, &key, 0);
10520
10521         free_extent_buffer(eb);
10522
10523         btrfs_init_path(&path);
10524         path.lowest_level = level;
10525         /* Search with the first key, to ensure we can reach it */
10526         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10527         if (ret < 0) {
10528                 err |= REFERENCER_MISSING;
10529                 goto release_out;
10530         }
10531
10532         node = path.nodes[level];
10533         if (btrfs_header_bytenr(node) != bytenr) {
10534                 error(
10535         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10536                         bytenr, nodesize, bytenr,
10537                         btrfs_header_bytenr(node));
10538                 err |= REFERENCER_MISMATCH;
10539         }
10540         if (btrfs_header_level(node) != level) {
10541                 error(
10542         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10543                         bytenr, nodesize, level,
10544                         btrfs_header_level(node));
10545                 err |= REFERENCER_MISMATCH;
10546         }
10547
10548 release_out:
10549         btrfs_release_path(&path);
10550 out:
10551         if (err & REFERENCER_MISSING) {
10552                 if (level < 0)
10553                         error("extent [%llu %d] lost referencer (owner: %llu)",
10554                                 bytenr, nodesize, root_id);
10555                 else
10556                         error(
10557                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10558                                 bytenr, nodesize, root_id, level);
10559         }
10560
10561         return err;
10562 }
10563
10564 /*
10565  * Check if tree block @eb is tree reloc root.
10566  * Return 0 if it's not or any problem happens
10567  * Return 1 if it's a tree reloc root
10568  */
10569 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10570                                  struct extent_buffer *eb)
10571 {
10572         struct btrfs_root *tree_reloc_root;
10573         struct btrfs_key key;
10574         u64 bytenr = btrfs_header_bytenr(eb);
10575         u64 owner = btrfs_header_owner(eb);
10576         int ret = 0;
10577
10578         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10579         key.offset = owner;
10580         key.type = BTRFS_ROOT_ITEM_KEY;
10581
10582         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10583         if (IS_ERR(tree_reloc_root))
10584                 return 0;
10585
10586         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10587                 ret = 1;
10588         btrfs_free_fs_root(tree_reloc_root);
10589         return ret;
10590 }
10591
10592 /*
10593  * Check referencer for shared block backref
10594  * If level == -1, this function will resolve the level.
10595  */
10596 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10597                                      u64 parent, u64 bytenr, int level)
10598 {
10599         struct extent_buffer *eb;
10600         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10601         u32 nr;
10602         int found_parent = 0;
10603         int i;
10604
10605         eb = read_tree_block(fs_info, parent, nodesize, 0);
10606         if (!extent_buffer_uptodate(eb))
10607                 goto out;
10608
10609         if (level == -1)
10610                 level = query_tree_block_level(fs_info, bytenr);
10611         if (level < 0)
10612                 goto out;
10613
10614         /* It's possible it's a tree reloc root */
10615         if (parent == bytenr) {
10616                 if (is_tree_reloc_root(fs_info, eb))
10617                         found_parent = 1;
10618                 goto out;
10619         }
10620
10621         if (level + 1 != btrfs_header_level(eb))
10622                 goto out;
10623
10624         nr = btrfs_header_nritems(eb);
10625         for (i = 0; i < nr; i++) {
10626                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10627                         found_parent = 1;
10628                         break;
10629                 }
10630         }
10631 out:
10632         free_extent_buffer(eb);
10633         if (!found_parent) {
10634                 error(
10635         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10636                         bytenr, nodesize, parent, level);
10637                 return REFERENCER_MISSING;
10638         }
10639         return 0;
10640 }
10641
10642 /*
10643  * Check referencer for normal (inlined) data ref
10644  * If len == 0, it will be resolved by searching in extent tree
10645  */
10646 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10647                                      u64 root_id, u64 objectid, u64 offset,
10648                                      u64 bytenr, u64 len, u32 count)
10649 {
10650         struct btrfs_root *root;
10651         struct btrfs_root *extent_root = fs_info->extent_root;
10652         struct btrfs_key key;
10653         struct btrfs_path path;
10654         struct extent_buffer *leaf;
10655         struct btrfs_file_extent_item *fi;
10656         u32 found_count = 0;
10657         int slot;
10658         int ret = 0;
10659
10660         if (!len) {
10661                 key.objectid = bytenr;
10662                 key.type = BTRFS_EXTENT_ITEM_KEY;
10663                 key.offset = (u64)-1;
10664
10665                 btrfs_init_path(&path);
10666                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10667                 if (ret < 0)
10668                         goto out;
10669                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10670                 if (ret)
10671                         goto out;
10672                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10673                 if (key.objectid != bytenr ||
10674                     key.type != BTRFS_EXTENT_ITEM_KEY)
10675                         goto out;
10676                 len = key.offset;
10677                 btrfs_release_path(&path);
10678         }
10679         key.objectid = root_id;
10680         key.type = BTRFS_ROOT_ITEM_KEY;
10681         key.offset = (u64)-1;
10682         btrfs_init_path(&path);
10683
10684         root = btrfs_read_fs_root(fs_info, &key);
10685         if (IS_ERR(root))
10686                 goto out;
10687
10688         key.objectid = objectid;
10689         key.type = BTRFS_EXTENT_DATA_KEY;
10690         /*
10691          * It can be nasty as data backref offset is
10692          * file offset - file extent offset, which is smaller or
10693          * equal to original backref offset.  The only special case is
10694          * overflow.  So we need to special check and do further search.
10695          */
10696         key.offset = offset & (1ULL << 63) ? 0 : offset;
10697
10698         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10699         if (ret < 0)
10700                 goto out;
10701
10702         /*
10703          * Search afterwards to get correct one
10704          * NOTE: As we must do a comprehensive check on the data backref to
10705          * make sure the dref count also matches, we must iterate all file
10706          * extents for that inode.
10707          */
10708         while (1) {
10709                 leaf = path.nodes[0];
10710                 slot = path.slots[0];
10711
10712                 if (slot >= btrfs_header_nritems(leaf))
10713                         goto next;
10714                 btrfs_item_key_to_cpu(leaf, &key, slot);
10715                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10716                         break;
10717                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10718                 /*
10719                  * Except normal disk bytenr and disk num bytes, we still
10720                  * need to do extra check on dbackref offset as
10721                  * dbackref offset = file_offset - file_extent_offset
10722                  */
10723                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10724                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10725                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10726                     offset)
10727                         found_count++;
10728
10729 next:
10730                 ret = btrfs_next_item(root, &path);
10731                 if (ret)
10732                         break;
10733         }
10734 out:
10735         btrfs_release_path(&path);
10736         if (found_count != count) {
10737                 error(
10738 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10739                         bytenr, len, root_id, objectid, offset, count, found_count);
10740                 return REFERENCER_MISSING;
10741         }
10742         return 0;
10743 }
10744
10745 /*
10746  * Check if the referencer of a shared data backref exists
10747  */
10748 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10749                                      u64 parent, u64 bytenr)
10750 {
10751         struct extent_buffer *eb;
10752         struct btrfs_key key;
10753         struct btrfs_file_extent_item *fi;
10754         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10755         u32 nr;
10756         int found_parent = 0;
10757         int i;
10758
10759         eb = read_tree_block(fs_info, parent, nodesize, 0);
10760         if (!extent_buffer_uptodate(eb))
10761                 goto out;
10762
10763         nr = btrfs_header_nritems(eb);
10764         for (i = 0; i < nr; i++) {
10765                 btrfs_item_key_to_cpu(eb, &key, i);
10766                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10767                         continue;
10768
10769                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10770                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10771                         continue;
10772
10773                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10774                         found_parent = 1;
10775                         break;
10776                 }
10777         }
10778
10779 out:
10780         free_extent_buffer(eb);
10781         if (!found_parent) {
10782                 error("shared extent %llu referencer lost (parent: %llu)",
10783                         bytenr, parent);
10784                 return REFERENCER_MISSING;
10785         }
10786         return 0;
10787 }
10788
10789 /*
10790  * This function will check a given extent item, including its backref and
10791  * itself (like crossing stripe boundary and type)
10792  *
10793  * Since we don't use extent_record anymore, introduce new error bit
10794  */
10795 static int check_extent_item(struct btrfs_fs_info *fs_info,
10796                              struct extent_buffer *eb, int slot)
10797 {
10798         struct btrfs_extent_item *ei;
10799         struct btrfs_extent_inline_ref *iref;
10800         struct btrfs_extent_data_ref *dref;
10801         unsigned long end;
10802         unsigned long ptr;
10803         int type;
10804         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10805         u32 item_size = btrfs_item_size_nr(eb, slot);
10806         u64 flags;
10807         u64 offset;
10808         int metadata = 0;
10809         int level;
10810         struct btrfs_key key;
10811         int ret;
10812         int err = 0;
10813
10814         btrfs_item_key_to_cpu(eb, &key, slot);
10815         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10816                 bytes_used += key.offset;
10817         else
10818                 bytes_used += nodesize;
10819
10820         if (item_size < sizeof(*ei)) {
10821                 /*
10822                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10823                  * old thing when on disk format is still un-determined.
10824                  * No need to care about it anymore
10825                  */
10826                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10827                 return -ENOTTY;
10828         }
10829
10830         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10831         flags = btrfs_extent_flags(eb, ei);
10832
10833         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10834                 metadata = 1;
10835         if (metadata && check_crossing_stripes(global_info, key.objectid,
10836                                                eb->len)) {
10837                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10838                       key.objectid, key.objectid + nodesize);
10839                 err |= CROSSING_STRIPE_BOUNDARY;
10840         }
10841
10842         ptr = (unsigned long)(ei + 1);
10843
10844         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10845                 /* Old EXTENT_ITEM metadata */
10846                 struct btrfs_tree_block_info *info;
10847
10848                 info = (struct btrfs_tree_block_info *)ptr;
10849                 level = btrfs_tree_block_level(eb, info);
10850                 ptr += sizeof(struct btrfs_tree_block_info);
10851         } else {
10852                 /* New METADATA_ITEM */
10853                 level = key.offset;
10854         }
10855         end = (unsigned long)ei + item_size;
10856
10857 next:
10858         /* Reached extent item end normally */
10859         if (ptr == end)
10860                 goto out;
10861
10862         /* Beyond extent item end, wrong item size */
10863         if (ptr > end) {
10864                 err |= ITEM_SIZE_MISMATCH;
10865                 error("extent item at bytenr %llu slot %d has wrong size",
10866                         eb->start, slot);
10867                 goto out;
10868         }
10869
10870         /* Now check every backref in this extent item */
10871         iref = (struct btrfs_extent_inline_ref *)ptr;
10872         type = btrfs_extent_inline_ref_type(eb, iref);
10873         offset = btrfs_extent_inline_ref_offset(eb, iref);
10874         switch (type) {
10875         case BTRFS_TREE_BLOCK_REF_KEY:
10876                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10877                                                level);
10878                 err |= ret;
10879                 break;
10880         case BTRFS_SHARED_BLOCK_REF_KEY:
10881                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10882                                                  level);
10883                 err |= ret;
10884                 break;
10885         case BTRFS_EXTENT_DATA_REF_KEY:
10886                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10887                 ret = check_extent_data_backref(fs_info,
10888                                 btrfs_extent_data_ref_root(eb, dref),
10889                                 btrfs_extent_data_ref_objectid(eb, dref),
10890                                 btrfs_extent_data_ref_offset(eb, dref),
10891                                 key.objectid, key.offset,
10892                                 btrfs_extent_data_ref_count(eb, dref));
10893                 err |= ret;
10894                 break;
10895         case BTRFS_SHARED_DATA_REF_KEY:
10896                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10897                 err |= ret;
10898                 break;
10899         default:
10900                 error("extent[%llu %d %llu] has unknown ref type: %d",
10901                         key.objectid, key.type, key.offset, type);
10902                 err |= UNKNOWN_TYPE;
10903                 goto out;
10904         }
10905
10906         ptr += btrfs_extent_inline_ref_size(type);
10907         goto next;
10908
10909 out:
10910         return err;
10911 }
10912
10913 /*
10914  * Check if a dev extent item is referred correctly by its chunk
10915  */
10916 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10917                                  struct extent_buffer *eb, int slot)
10918 {
10919         struct btrfs_root *chunk_root = fs_info->chunk_root;
10920         struct btrfs_dev_extent *ptr;
10921         struct btrfs_path path;
10922         struct btrfs_key chunk_key;
10923         struct btrfs_key devext_key;
10924         struct btrfs_chunk *chunk;
10925         struct extent_buffer *l;
10926         int num_stripes;
10927         u64 length;
10928         int i;
10929         int found_chunk = 0;
10930         int ret;
10931
10932         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10933         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10934         length = btrfs_dev_extent_length(eb, ptr);
10935
10936         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10937         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10938         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10939
10940         btrfs_init_path(&path);
10941         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10942         if (ret)
10943                 goto out;
10944
10945         l = path.nodes[0];
10946         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10947         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10948                                       chunk_key.offset);
10949         if (ret < 0)
10950                 goto out;
10951
10952         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10953                 goto out;
10954
10955         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10956         for (i = 0; i < num_stripes; i++) {
10957                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10958                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10959
10960                 if (devid == devext_key.objectid &&
10961                     offset == devext_key.offset) {
10962                         found_chunk = 1;
10963                         break;
10964                 }
10965         }
10966 out:
10967         btrfs_release_path(&path);
10968         if (!found_chunk) {
10969                 error(
10970                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10971                         devext_key.objectid, devext_key.offset, length);
10972                 return REFERENCER_MISSING;
10973         }
10974         return 0;
10975 }
10976
10977 /*
10978  * Check if the used space is correct with the dev item
10979  */
10980 static int check_dev_item(struct btrfs_fs_info *fs_info,
10981                           struct extent_buffer *eb, int slot)
10982 {
10983         struct btrfs_root *dev_root = fs_info->dev_root;
10984         struct btrfs_dev_item *dev_item;
10985         struct btrfs_path path;
10986         struct btrfs_key key;
10987         struct btrfs_dev_extent *ptr;
10988         u64 dev_id;
10989         u64 used;
10990         u64 total = 0;
10991         int ret;
10992
10993         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10994         dev_id = btrfs_device_id(eb, dev_item);
10995         used = btrfs_device_bytes_used(eb, dev_item);
10996
10997         key.objectid = dev_id;
10998         key.type = BTRFS_DEV_EXTENT_KEY;
10999         key.offset = 0;
11000
11001         btrfs_init_path(&path);
11002         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11003         if (ret < 0) {
11004                 btrfs_item_key_to_cpu(eb, &key, slot);
11005                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11006                         key.objectid, key.type, key.offset);
11007                 btrfs_release_path(&path);
11008                 return REFERENCER_MISSING;
11009         }
11010
11011         /* Iterate dev_extents to calculate the used space of a device */
11012         while (1) {
11013                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11014                         goto next;
11015
11016                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11017                 if (key.objectid > dev_id)
11018                         break;
11019                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11020                         goto next;
11021
11022                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11023                                      struct btrfs_dev_extent);
11024                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11025 next:
11026                 ret = btrfs_next_item(dev_root, &path);
11027                 if (ret)
11028                         break;
11029         }
11030         btrfs_release_path(&path);
11031
11032         if (used != total) {
11033                 btrfs_item_key_to_cpu(eb, &key, slot);
11034                 error(
11035 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11036                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11037                         BTRFS_DEV_EXTENT_KEY, dev_id);
11038                 return ACCOUNTING_MISMATCH;
11039         }
11040         return 0;
11041 }
11042
11043 /*
11044  * Check a block group item with its referener (chunk) and its used space
11045  * with extent/metadata item
11046  */
11047 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11048                                   struct extent_buffer *eb, int slot)
11049 {
11050         struct btrfs_root *extent_root = fs_info->extent_root;
11051         struct btrfs_root *chunk_root = fs_info->chunk_root;
11052         struct btrfs_block_group_item *bi;
11053         struct btrfs_block_group_item bg_item;
11054         struct btrfs_path path;
11055         struct btrfs_key bg_key;
11056         struct btrfs_key chunk_key;
11057         struct btrfs_key extent_key;
11058         struct btrfs_chunk *chunk;
11059         struct extent_buffer *leaf;
11060         struct btrfs_extent_item *ei;
11061         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11062         u64 flags;
11063         u64 bg_flags;
11064         u64 used;
11065         u64 total = 0;
11066         int ret;
11067         int err = 0;
11068
11069         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11070         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11071         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11072         used = btrfs_block_group_used(&bg_item);
11073         bg_flags = btrfs_block_group_flags(&bg_item);
11074
11075         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11076         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11077         chunk_key.offset = bg_key.objectid;
11078
11079         btrfs_init_path(&path);
11080         /* Search for the referencer chunk */
11081         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11082         if (ret) {
11083                 error(
11084                 "block group[%llu %llu] did not find the related chunk item",
11085                         bg_key.objectid, bg_key.offset);
11086                 err |= REFERENCER_MISSING;
11087         } else {
11088                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11089                                         struct btrfs_chunk);
11090                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11091                                                 bg_key.offset) {
11092                         error(
11093         "block group[%llu %llu] related chunk item length does not match",
11094                                 bg_key.objectid, bg_key.offset);
11095                         err |= REFERENCER_MISMATCH;
11096                 }
11097         }
11098         btrfs_release_path(&path);
11099
11100         /* Search from the block group bytenr */
11101         extent_key.objectid = bg_key.objectid;
11102         extent_key.type = 0;
11103         extent_key.offset = 0;
11104
11105         btrfs_init_path(&path);
11106         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11107         if (ret < 0)
11108                 goto out;
11109
11110         /* Iterate extent tree to account used space */
11111         while (1) {
11112                 leaf = path.nodes[0];
11113
11114                 /* Search slot can point to the last item beyond leaf nritems */
11115                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11116                         goto next;
11117
11118                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11119                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11120                         break;
11121
11122                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11123                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11124                         goto next;
11125                 if (extent_key.objectid < bg_key.objectid)
11126                         goto next;
11127
11128                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11129                         total += nodesize;
11130                 else
11131                         total += extent_key.offset;
11132
11133                 ei = btrfs_item_ptr(leaf, path.slots[0],
11134                                     struct btrfs_extent_item);
11135                 flags = btrfs_extent_flags(leaf, ei);
11136                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11137                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11138                                 error(
11139                         "bad extent[%llu, %llu) type mismatch with chunk",
11140                                         extent_key.objectid,
11141                                         extent_key.objectid + extent_key.offset);
11142                                 err |= CHUNK_TYPE_MISMATCH;
11143                         }
11144                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11145                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11146                                     BTRFS_BLOCK_GROUP_METADATA))) {
11147                                 error(
11148                         "bad extent[%llu, %llu) type mismatch with chunk",
11149                                         extent_key.objectid,
11150                                         extent_key.objectid + nodesize);
11151                                 err |= CHUNK_TYPE_MISMATCH;
11152                         }
11153                 }
11154 next:
11155                 ret = btrfs_next_item(extent_root, &path);
11156                 if (ret)
11157                         break;
11158         }
11159
11160 out:
11161         btrfs_release_path(&path);
11162
11163         if (total != used) {
11164                 error(
11165                 "block group[%llu %llu] used %llu but extent items used %llu",
11166                         bg_key.objectid, bg_key.offset, used, total);
11167                 err |= ACCOUNTING_MISMATCH;
11168         }
11169         return err;
11170 }
11171
11172 /*
11173  * Check a chunk item.
11174  * Including checking all referred dev_extents and block group
11175  */
11176 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11177                             struct extent_buffer *eb, int slot)
11178 {
11179         struct btrfs_root *extent_root = fs_info->extent_root;
11180         struct btrfs_root *dev_root = fs_info->dev_root;
11181         struct btrfs_path path;
11182         struct btrfs_key chunk_key;
11183         struct btrfs_key bg_key;
11184         struct btrfs_key devext_key;
11185         struct btrfs_chunk *chunk;
11186         struct extent_buffer *leaf;
11187         struct btrfs_block_group_item *bi;
11188         struct btrfs_block_group_item bg_item;
11189         struct btrfs_dev_extent *ptr;
11190         u64 length;
11191         u64 chunk_end;
11192         u64 stripe_len;
11193         u64 type;
11194         int num_stripes;
11195         u64 offset;
11196         u64 objectid;
11197         int i;
11198         int ret;
11199         int err = 0;
11200
11201         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11202         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11203         length = btrfs_chunk_length(eb, chunk);
11204         chunk_end = chunk_key.offset + length;
11205         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11206                                       chunk_key.offset);
11207         if (ret < 0) {
11208                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11209                         chunk_end);
11210                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11211                 goto out;
11212         }
11213         type = btrfs_chunk_type(eb, chunk);
11214
11215         bg_key.objectid = chunk_key.offset;
11216         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11217         bg_key.offset = length;
11218
11219         btrfs_init_path(&path);
11220         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11221         if (ret) {
11222                 error(
11223                 "chunk[%llu %llu) did not find the related block group item",
11224                         chunk_key.offset, chunk_end);
11225                 err |= REFERENCER_MISSING;
11226         } else{
11227                 leaf = path.nodes[0];
11228                 bi = btrfs_item_ptr(leaf, path.slots[0],
11229                                     struct btrfs_block_group_item);
11230                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11231                                    sizeof(bg_item));
11232                 if (btrfs_block_group_flags(&bg_item) != type) {
11233                         error(
11234 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11235                                 chunk_key.offset, chunk_end, type,
11236                                 btrfs_block_group_flags(&bg_item));
11237                         err |= REFERENCER_MISSING;
11238                 }
11239         }
11240
11241         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11242         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11243         for (i = 0; i < num_stripes; i++) {
11244                 btrfs_release_path(&path);
11245                 btrfs_init_path(&path);
11246                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11247                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11248                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11249
11250                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11251                                         0, 0);
11252                 if (ret)
11253                         goto not_match_dev;
11254
11255                 leaf = path.nodes[0];
11256                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11257                                      struct btrfs_dev_extent);
11258                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11259                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11260                 if (objectid != chunk_key.objectid ||
11261                     offset != chunk_key.offset ||
11262                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11263                         goto not_match_dev;
11264                 continue;
11265 not_match_dev:
11266                 err |= BACKREF_MISSING;
11267                 error(
11268                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11269                         chunk_key.objectid, chunk_end, i);
11270                 continue;
11271         }
11272         btrfs_release_path(&path);
11273 out:
11274         return err;
11275 }
11276
11277 /*
11278  * Main entry function to check known items and update related accounting info
11279  */
11280 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11281 {
11282         struct btrfs_fs_info *fs_info = root->fs_info;
11283         struct btrfs_key key;
11284         int slot = 0;
11285         int type;
11286         struct btrfs_extent_data_ref *dref;
11287         int ret;
11288         int err = 0;
11289
11290 next:
11291         btrfs_item_key_to_cpu(eb, &key, slot);
11292         type = key.type;
11293
11294         switch (type) {
11295         case BTRFS_EXTENT_DATA_KEY:
11296                 ret = check_extent_data_item(root, eb, slot);
11297                 err |= ret;
11298                 break;
11299         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11300                 ret = check_block_group_item(fs_info, eb, slot);
11301                 err |= ret;
11302                 break;
11303         case BTRFS_DEV_ITEM_KEY:
11304                 ret = check_dev_item(fs_info, eb, slot);
11305                 err |= ret;
11306                 break;
11307         case BTRFS_CHUNK_ITEM_KEY:
11308                 ret = check_chunk_item(fs_info, eb, slot);
11309                 err |= ret;
11310                 break;
11311         case BTRFS_DEV_EXTENT_KEY:
11312                 ret = check_dev_extent_item(fs_info, eb, slot);
11313                 err |= ret;
11314                 break;
11315         case BTRFS_EXTENT_ITEM_KEY:
11316         case BTRFS_METADATA_ITEM_KEY:
11317                 ret = check_extent_item(fs_info, eb, slot);
11318                 err |= ret;
11319                 break;
11320         case BTRFS_EXTENT_CSUM_KEY:
11321                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11322                 break;
11323         case BTRFS_TREE_BLOCK_REF_KEY:
11324                 ret = check_tree_block_backref(fs_info, key.offset,
11325                                                key.objectid, -1);
11326                 err |= ret;
11327                 break;
11328         case BTRFS_EXTENT_DATA_REF_KEY:
11329                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11330                 ret = check_extent_data_backref(fs_info,
11331                                 btrfs_extent_data_ref_root(eb, dref),
11332                                 btrfs_extent_data_ref_objectid(eb, dref),
11333                                 btrfs_extent_data_ref_offset(eb, dref),
11334                                 key.objectid, 0,
11335                                 btrfs_extent_data_ref_count(eb, dref));
11336                 err |= ret;
11337                 break;
11338         case BTRFS_SHARED_BLOCK_REF_KEY:
11339                 ret = check_shared_block_backref(fs_info, key.offset,
11340                                                  key.objectid, -1);
11341                 err |= ret;
11342                 break;
11343         case BTRFS_SHARED_DATA_REF_KEY:
11344                 ret = check_shared_data_backref(fs_info, key.offset,
11345                                                 key.objectid);
11346                 err |= ret;
11347                 break;
11348         default:
11349                 break;
11350         }
11351
11352         if (++slot < btrfs_header_nritems(eb))
11353                 goto next;
11354
11355         return err;
11356 }
11357
11358 /*
11359  * Helper function for later fs/subvol tree check.  To determine if a tree
11360  * block should be checked.
11361  * This function will ensure only the direct referencer with lowest rootid to
11362  * check a fs/subvolume tree block.
11363  *
11364  * Backref check at extent tree would detect errors like missing subvolume
11365  * tree, so we can do aggressive check to reduce duplicated checks.
11366  */
11367 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11368 {
11369         struct btrfs_root *extent_root = root->fs_info->extent_root;
11370         struct btrfs_key key;
11371         struct btrfs_path path;
11372         struct extent_buffer *leaf;
11373         int slot;
11374         struct btrfs_extent_item *ei;
11375         unsigned long ptr;
11376         unsigned long end;
11377         int type;
11378         u32 item_size;
11379         u64 offset;
11380         struct btrfs_extent_inline_ref *iref;
11381         int ret;
11382
11383         btrfs_init_path(&path);
11384         key.objectid = btrfs_header_bytenr(eb);
11385         key.type = BTRFS_METADATA_ITEM_KEY;
11386         key.offset = (u64)-1;
11387
11388         /*
11389          * Any failure in backref resolving means we can't determine
11390          * whom the tree block belongs to.
11391          * So in that case, we need to check that tree block
11392          */
11393         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11394         if (ret < 0)
11395                 goto need_check;
11396
11397         ret = btrfs_previous_extent_item(extent_root, &path,
11398                                          btrfs_header_bytenr(eb));
11399         if (ret)
11400                 goto need_check;
11401
11402         leaf = path.nodes[0];
11403         slot = path.slots[0];
11404         btrfs_item_key_to_cpu(leaf, &key, slot);
11405         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11406
11407         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11408                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11409         } else {
11410                 struct btrfs_tree_block_info *info;
11411
11412                 info = (struct btrfs_tree_block_info *)(ei + 1);
11413                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11414         }
11415
11416         item_size = btrfs_item_size_nr(leaf, slot);
11417         ptr = (unsigned long)iref;
11418         end = (unsigned long)ei + item_size;
11419         while (ptr < end) {
11420                 iref = (struct btrfs_extent_inline_ref *)ptr;
11421                 type = btrfs_extent_inline_ref_type(leaf, iref);
11422                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11423
11424                 /*
11425                  * We only check the tree block if current root is
11426                  * the lowest referencer of it.
11427                  */
11428                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11429                     offset < root->objectid) {
11430                         btrfs_release_path(&path);
11431                         return 0;
11432                 }
11433
11434                 ptr += btrfs_extent_inline_ref_size(type);
11435         }
11436         /*
11437          * Normally we should also check keyed tree block ref, but that may be
11438          * very time consuming.  Inlined ref should already make us skip a lot
11439          * of refs now.  So skip search keyed tree block ref.
11440          */
11441
11442 need_check:
11443         btrfs_release_path(&path);
11444         return 1;
11445 }
11446
11447 /*
11448  * Traversal function for tree block. We will do:
11449  * 1) Skip shared fs/subvolume tree blocks
11450  * 2) Update related bytes accounting
11451  * 3) Pre-order traversal
11452  */
11453 static int traverse_tree_block(struct btrfs_root *root,
11454                                 struct extent_buffer *node)
11455 {
11456         struct extent_buffer *eb;
11457         struct btrfs_key key;
11458         struct btrfs_key drop_key;
11459         int level;
11460         u64 nr;
11461         int i;
11462         int err = 0;
11463         int ret;
11464
11465         /*
11466          * Skip shared fs/subvolume tree block, in that case they will
11467          * be checked by referencer with lowest rootid
11468          */
11469         if (is_fstree(root->objectid) && !should_check(root, node))
11470                 return 0;
11471
11472         /* Update bytes accounting */
11473         total_btree_bytes += node->len;
11474         if (fs_root_objectid(btrfs_header_owner(node)))
11475                 total_fs_tree_bytes += node->len;
11476         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11477                 total_extent_tree_bytes += node->len;
11478         if (!found_old_backref &&
11479             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11480             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11481             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11482                 found_old_backref = 1;
11483
11484         /* pre-order tranversal, check itself first */
11485         level = btrfs_header_level(node);
11486         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11487                                    btrfs_header_level(node),
11488                                    btrfs_header_owner(node));
11489         err |= ret;
11490         if (err)
11491                 error(
11492         "check %s failed root %llu bytenr %llu level %d, force continue check",
11493                         level ? "node":"leaf", root->objectid,
11494                         btrfs_header_bytenr(node), btrfs_header_level(node));
11495
11496         if (!level) {
11497                 btree_space_waste += btrfs_leaf_free_space(root, node);
11498                 ret = check_leaf_items(root, node);
11499                 err |= ret;
11500                 return err;
11501         }
11502
11503         nr = btrfs_header_nritems(node);
11504         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11505         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11506                 sizeof(struct btrfs_key_ptr);
11507
11508         /* Then check all its children */
11509         for (i = 0; i < nr; i++) {
11510                 u64 blocknr = btrfs_node_blockptr(node, i);
11511
11512                 btrfs_node_key_to_cpu(node, &key, i);
11513                 if (level == root->root_item.drop_level &&
11514                     is_dropped_key(&key, &drop_key))
11515                         continue;
11516
11517                 /*
11518                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11519                  * to call the function itself.
11520                  */
11521                 eb = read_tree_block(root->fs_info, blocknr,
11522                                 root->fs_info->nodesize, 0);
11523                 if (extent_buffer_uptodate(eb)) {
11524                         ret = traverse_tree_block(root, eb);
11525                         err |= ret;
11526                 }
11527                 free_extent_buffer(eb);
11528         }
11529
11530         return err;
11531 }
11532
11533 /*
11534  * Low memory usage version check_chunks_and_extents.
11535  */
11536 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11537 {
11538         struct btrfs_path path;
11539         struct btrfs_key key;
11540         struct btrfs_root *root1;
11541         struct btrfs_root *cur_root;
11542         int err = 0;
11543         int ret;
11544
11545         root1 = root->fs_info->chunk_root;
11546         ret = traverse_tree_block(root1, root1->node);
11547         err |= ret;
11548
11549         root1 = root->fs_info->tree_root;
11550         ret = traverse_tree_block(root1, root1->node);
11551         err |= ret;
11552
11553         btrfs_init_path(&path);
11554         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11555         key.offset = 0;
11556         key.type = BTRFS_ROOT_ITEM_KEY;
11557
11558         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11559         if (ret) {
11560                 error("cannot find extent treet in tree_root");
11561                 goto out;
11562         }
11563
11564         while (1) {
11565                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11566                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11567                         goto next;
11568                 key.offset = (u64)-1;
11569
11570                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11571                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11572                                         &key);
11573                 else
11574                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11575                 if (IS_ERR(cur_root) || !cur_root) {
11576                         error("failed to read tree: %lld", key.objectid);
11577                         goto next;
11578                 }
11579
11580                 ret = traverse_tree_block(cur_root, cur_root->node);
11581                 err |= ret;
11582
11583                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11584                         btrfs_free_fs_root(cur_root);
11585 next:
11586                 ret = btrfs_next_item(root1, &path);
11587                 if (ret)
11588                         goto out;
11589         }
11590
11591 out:
11592         btrfs_release_path(&path);
11593         return err;
11594 }
11595
11596 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11597                            struct btrfs_root *root, int overwrite)
11598 {
11599         struct extent_buffer *c;
11600         struct extent_buffer *old = root->node;
11601         int level;
11602         int ret;
11603         struct btrfs_disk_key disk_key = {0,0,0};
11604
11605         level = 0;
11606
11607         if (overwrite) {
11608                 c = old;
11609                 extent_buffer_get(c);
11610                 goto init;
11611         }
11612         c = btrfs_alloc_free_block(trans, root,
11613                                    root->fs_info->nodesize,
11614                                    root->root_key.objectid,
11615                                    &disk_key, level, 0, 0);
11616         if (IS_ERR(c)) {
11617                 c = old;
11618                 extent_buffer_get(c);
11619                 overwrite = 1;
11620         }
11621 init:
11622         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11623         btrfs_set_header_level(c, level);
11624         btrfs_set_header_bytenr(c, c->start);
11625         btrfs_set_header_generation(c, trans->transid);
11626         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11627         btrfs_set_header_owner(c, root->root_key.objectid);
11628
11629         write_extent_buffer(c, root->fs_info->fsid,
11630                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11631
11632         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11633                             btrfs_header_chunk_tree_uuid(c),
11634                             BTRFS_UUID_SIZE);
11635
11636         btrfs_mark_buffer_dirty(c);
11637         /*
11638          * this case can happen in the following case:
11639          *
11640          * 1.overwrite previous root.
11641          *
11642          * 2.reinit reloc data root, this is because we skip pin
11643          * down reloc data tree before which means we can allocate
11644          * same block bytenr here.
11645          */
11646         if (old->start == c->start) {
11647                 btrfs_set_root_generation(&root->root_item,
11648                                           trans->transid);
11649                 root->root_item.level = btrfs_header_level(root->node);
11650                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11651                                         &root->root_key, &root->root_item);
11652                 if (ret) {
11653                         free_extent_buffer(c);
11654                         return ret;
11655                 }
11656         }
11657         free_extent_buffer(old);
11658         root->node = c;
11659         add_root_to_dirty_list(root);
11660         return 0;
11661 }
11662
11663 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11664                                 struct extent_buffer *eb, int tree_root)
11665 {
11666         struct extent_buffer *tmp;
11667         struct btrfs_root_item *ri;
11668         struct btrfs_key key;
11669         u64 bytenr;
11670         u32 nodesize;
11671         int level = btrfs_header_level(eb);
11672         int nritems;
11673         int ret;
11674         int i;
11675
11676         /*
11677          * If we have pinned this block before, don't pin it again.
11678          * This can not only avoid forever loop with broken filesystem
11679          * but also give us some speedups.
11680          */
11681         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11682                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11683                 return 0;
11684
11685         btrfs_pin_extent(fs_info, eb->start, eb->len);
11686
11687         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11688         nritems = btrfs_header_nritems(eb);
11689         for (i = 0; i < nritems; i++) {
11690                 if (level == 0) {
11691                         btrfs_item_key_to_cpu(eb, &key, i);
11692                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11693                                 continue;
11694                         /* Skip the extent root and reloc roots */
11695                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11696                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11697                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11698                                 continue;
11699                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11700                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11701
11702                         /*
11703                          * If at any point we start needing the real root we
11704                          * will have to build a stump root for the root we are
11705                          * in, but for now this doesn't actually use the root so
11706                          * just pass in extent_root.
11707                          */
11708                         tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11709                         if (!extent_buffer_uptodate(tmp)) {
11710                                 fprintf(stderr, "Error reading root block\n");
11711                                 return -EIO;
11712                         }
11713                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11714                         free_extent_buffer(tmp);
11715                         if (ret)
11716                                 return ret;
11717                 } else {
11718                         bytenr = btrfs_node_blockptr(eb, i);
11719
11720                         /* If we aren't the tree root don't read the block */
11721                         if (level == 1 && !tree_root) {
11722                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11723                                 continue;
11724                         }
11725
11726                         tmp = read_tree_block(fs_info, bytenr,
11727                                               nodesize, 0);
11728                         if (!extent_buffer_uptodate(tmp)) {
11729                                 fprintf(stderr, "Error reading tree block\n");
11730                                 return -EIO;
11731                         }
11732                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11733                         free_extent_buffer(tmp);
11734                         if (ret)
11735                                 return ret;
11736                 }
11737         }
11738
11739         return 0;
11740 }
11741
11742 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11743 {
11744         int ret;
11745
11746         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11747         if (ret)
11748                 return ret;
11749
11750         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11751 }
11752
11753 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11754 {
11755         struct btrfs_block_group_cache *cache;
11756         struct btrfs_path path;
11757         struct extent_buffer *leaf;
11758         struct btrfs_chunk *chunk;
11759         struct btrfs_key key;
11760         int ret;
11761         u64 start;
11762
11763         btrfs_init_path(&path);
11764         key.objectid = 0;
11765         key.type = BTRFS_CHUNK_ITEM_KEY;
11766         key.offset = 0;
11767         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11768         if (ret < 0) {
11769                 btrfs_release_path(&path);
11770                 return ret;
11771         }
11772
11773         /*
11774          * We do this in case the block groups were screwed up and had alloc
11775          * bits that aren't actually set on the chunks.  This happens with
11776          * restored images every time and could happen in real life I guess.
11777          */
11778         fs_info->avail_data_alloc_bits = 0;
11779         fs_info->avail_metadata_alloc_bits = 0;
11780         fs_info->avail_system_alloc_bits = 0;
11781
11782         /* First we need to create the in-memory block groups */
11783         while (1) {
11784                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11785                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11786                         if (ret < 0) {
11787                                 btrfs_release_path(&path);
11788                                 return ret;
11789                         }
11790                         if (ret) {
11791                                 ret = 0;
11792                                 break;
11793                         }
11794                 }
11795                 leaf = path.nodes[0];
11796                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11797                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11798                         path.slots[0]++;
11799                         continue;
11800                 }
11801
11802                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11803                 btrfs_add_block_group(fs_info, 0,
11804                                       btrfs_chunk_type(leaf, chunk),
11805                                       key.objectid, key.offset,
11806                                       btrfs_chunk_length(leaf, chunk));
11807                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11808                                  key.offset + btrfs_chunk_length(leaf, chunk));
11809                 path.slots[0]++;
11810         }
11811         start = 0;
11812         while (1) {
11813                 cache = btrfs_lookup_first_block_group(fs_info, start);
11814                 if (!cache)
11815                         break;
11816                 cache->cached = 1;
11817                 start = cache->key.objectid + cache->key.offset;
11818         }
11819
11820         btrfs_release_path(&path);
11821         return 0;
11822 }
11823
11824 static int reset_balance(struct btrfs_trans_handle *trans,
11825                          struct btrfs_fs_info *fs_info)
11826 {
11827         struct btrfs_root *root = fs_info->tree_root;
11828         struct btrfs_path path;
11829         struct extent_buffer *leaf;
11830         struct btrfs_key key;
11831         int del_slot, del_nr = 0;
11832         int ret;
11833         int found = 0;
11834
11835         btrfs_init_path(&path);
11836         key.objectid = BTRFS_BALANCE_OBJECTID;
11837         key.type = BTRFS_BALANCE_ITEM_KEY;
11838         key.offset = 0;
11839         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11840         if (ret) {
11841                 if (ret > 0)
11842                         ret = 0;
11843                 if (!ret)
11844                         goto reinit_data_reloc;
11845                 else
11846                         goto out;
11847         }
11848
11849         ret = btrfs_del_item(trans, root, &path);
11850         if (ret)
11851                 goto out;
11852         btrfs_release_path(&path);
11853
11854         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11855         key.type = BTRFS_ROOT_ITEM_KEY;
11856         key.offset = 0;
11857         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11858         if (ret < 0)
11859                 goto out;
11860         while (1) {
11861                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11862                         if (!found)
11863                                 break;
11864
11865                         if (del_nr) {
11866                                 ret = btrfs_del_items(trans, root, &path,
11867                                                       del_slot, del_nr);
11868                                 del_nr = 0;
11869                                 if (ret)
11870                                         goto out;
11871                         }
11872                         key.offset++;
11873                         btrfs_release_path(&path);
11874
11875                         found = 0;
11876                         ret = btrfs_search_slot(trans, root, &key, &path,
11877                                                 -1, 1);
11878                         if (ret < 0)
11879                                 goto out;
11880                         continue;
11881                 }
11882                 found = 1;
11883                 leaf = path.nodes[0];
11884                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11885                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11886                         break;
11887                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11888                         path.slots[0]++;
11889                         continue;
11890                 }
11891                 if (!del_nr) {
11892                         del_slot = path.slots[0];
11893                         del_nr = 1;
11894                 } else {
11895                         del_nr++;
11896                 }
11897                 path.slots[0]++;
11898         }
11899
11900         if (del_nr) {
11901                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11902                 if (ret)
11903                         goto out;
11904         }
11905         btrfs_release_path(&path);
11906
11907 reinit_data_reloc:
11908         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11909         key.type = BTRFS_ROOT_ITEM_KEY;
11910         key.offset = (u64)-1;
11911         root = btrfs_read_fs_root(fs_info, &key);
11912         if (IS_ERR(root)) {
11913                 fprintf(stderr, "Error reading data reloc tree\n");
11914                 ret = PTR_ERR(root);
11915                 goto out;
11916         }
11917         record_root_in_trans(trans, root);
11918         ret = btrfs_fsck_reinit_root(trans, root, 0);
11919         if (ret)
11920                 goto out;
11921         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11922 out:
11923         btrfs_release_path(&path);
11924         return ret;
11925 }
11926
11927 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11928                               struct btrfs_fs_info *fs_info)
11929 {
11930         u64 start = 0;
11931         int ret;
11932
11933         /*
11934          * The only reason we don't do this is because right now we're just
11935          * walking the trees we find and pinning down their bytes, we don't look
11936          * at any of the leaves.  In order to do mixed groups we'd have to check
11937          * the leaves of any fs roots and pin down the bytes for any file
11938          * extents we find.  Not hard but why do it if we don't have to?
11939          */
11940         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11941                 fprintf(stderr, "We don't support re-initing the extent tree "
11942                         "for mixed block groups yet, please notify a btrfs "
11943                         "developer you want to do this so they can add this "
11944                         "functionality.\n");
11945                 return -EINVAL;
11946         }
11947
11948         /*
11949          * first we need to walk all of the trees except the extent tree and pin
11950          * down the bytes that are in use so we don't overwrite any existing
11951          * metadata.
11952          */
11953         ret = pin_metadata_blocks(fs_info);
11954         if (ret) {
11955                 fprintf(stderr, "error pinning down used bytes\n");
11956                 return ret;
11957         }
11958
11959         /*
11960          * Need to drop all the block groups since we're going to recreate all
11961          * of them again.
11962          */
11963         btrfs_free_block_groups(fs_info);
11964         ret = reset_block_groups(fs_info);
11965         if (ret) {
11966                 fprintf(stderr, "error resetting the block groups\n");
11967                 return ret;
11968         }
11969
11970         /* Ok we can allocate now, reinit the extent root */
11971         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11972         if (ret) {
11973                 fprintf(stderr, "extent root initialization failed\n");
11974                 /*
11975                  * When the transaction code is updated we should end the
11976                  * transaction, but for now progs only knows about commit so
11977                  * just return an error.
11978                  */
11979                 return ret;
11980         }
11981
11982         /*
11983          * Now we have all the in-memory block groups setup so we can make
11984          * allocations properly, and the metadata we care about is safe since we
11985          * pinned all of it above.
11986          */
11987         while (1) {
11988                 struct btrfs_block_group_cache *cache;
11989
11990                 cache = btrfs_lookup_first_block_group(fs_info, start);
11991                 if (!cache)
11992                         break;
11993                 start = cache->key.objectid + cache->key.offset;
11994                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11995                                         &cache->key, &cache->item,
11996                                         sizeof(cache->item));
11997                 if (ret) {
11998                         fprintf(stderr, "Error adding block group\n");
11999                         return ret;
12000                 }
12001                 btrfs_extent_post_op(trans, fs_info->extent_root);
12002         }
12003
12004         ret = reset_balance(trans, fs_info);
12005         if (ret)
12006                 fprintf(stderr, "error resetting the pending balance\n");
12007
12008         return ret;
12009 }
12010
12011 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12012 {
12013         struct btrfs_path path;
12014         struct btrfs_trans_handle *trans;
12015         struct btrfs_key key;
12016         int ret;
12017
12018         printf("Recowing metadata block %llu\n", eb->start);
12019         key.objectid = btrfs_header_owner(eb);
12020         key.type = BTRFS_ROOT_ITEM_KEY;
12021         key.offset = (u64)-1;
12022
12023         root = btrfs_read_fs_root(root->fs_info, &key);
12024         if (IS_ERR(root)) {
12025                 fprintf(stderr, "Couldn't find owner root %llu\n",
12026                         key.objectid);
12027                 return PTR_ERR(root);
12028         }
12029
12030         trans = btrfs_start_transaction(root, 1);
12031         if (IS_ERR(trans))
12032                 return PTR_ERR(trans);
12033
12034         btrfs_init_path(&path);
12035         path.lowest_level = btrfs_header_level(eb);
12036         if (path.lowest_level)
12037                 btrfs_node_key_to_cpu(eb, &key, 0);
12038         else
12039                 btrfs_item_key_to_cpu(eb, &key, 0);
12040
12041         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12042         btrfs_commit_transaction(trans, root);
12043         btrfs_release_path(&path);
12044         return ret;
12045 }
12046
12047 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12048 {
12049         struct btrfs_path path;
12050         struct btrfs_trans_handle *trans;
12051         struct btrfs_key key;
12052         int ret;
12053
12054         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12055                bad->key.type, bad->key.offset);
12056         key.objectid = bad->root_id;
12057         key.type = BTRFS_ROOT_ITEM_KEY;
12058         key.offset = (u64)-1;
12059
12060         root = btrfs_read_fs_root(root->fs_info, &key);
12061         if (IS_ERR(root)) {
12062                 fprintf(stderr, "Couldn't find owner root %llu\n",
12063                         key.objectid);
12064                 return PTR_ERR(root);
12065         }
12066
12067         trans = btrfs_start_transaction(root, 1);
12068         if (IS_ERR(trans))
12069                 return PTR_ERR(trans);
12070
12071         btrfs_init_path(&path);
12072         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12073         if (ret) {
12074                 if (ret > 0)
12075                         ret = 0;
12076                 goto out;
12077         }
12078         ret = btrfs_del_item(trans, root, &path);
12079 out:
12080         btrfs_commit_transaction(trans, root);
12081         btrfs_release_path(&path);
12082         return ret;
12083 }
12084
12085 static int zero_log_tree(struct btrfs_root *root)
12086 {
12087         struct btrfs_trans_handle *trans;
12088         int ret;
12089
12090         trans = btrfs_start_transaction(root, 1);
12091         if (IS_ERR(trans)) {
12092                 ret = PTR_ERR(trans);
12093                 return ret;
12094         }
12095         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12096         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12097         ret = btrfs_commit_transaction(trans, root);
12098         return ret;
12099 }
12100
12101 static int populate_csum(struct btrfs_trans_handle *trans,
12102                          struct btrfs_root *csum_root, char *buf, u64 start,
12103                          u64 len)
12104 {
12105         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12106         u64 offset = 0;
12107         u64 sectorsize;
12108         int ret = 0;
12109
12110         while (offset < len) {
12111                 sectorsize = fs_info->sectorsize;
12112                 ret = read_extent_data(fs_info, buf, start + offset,
12113                                        &sectorsize, 0);
12114                 if (ret)
12115                         break;
12116                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12117                                             start + offset, buf, sectorsize);
12118                 if (ret)
12119                         break;
12120                 offset += sectorsize;
12121         }
12122         return ret;
12123 }
12124
12125 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12126                                       struct btrfs_root *csum_root,
12127                                       struct btrfs_root *cur_root)
12128 {
12129         struct btrfs_path path;
12130         struct btrfs_key key;
12131         struct extent_buffer *node;
12132         struct btrfs_file_extent_item *fi;
12133         char *buf = NULL;
12134         u64 start = 0;
12135         u64 len = 0;
12136         int slot = 0;
12137         int ret = 0;
12138
12139         buf = malloc(cur_root->fs_info->sectorsize);
12140         if (!buf)
12141                 return -ENOMEM;
12142
12143         btrfs_init_path(&path);
12144         key.objectid = 0;
12145         key.offset = 0;
12146         key.type = 0;
12147         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12148         if (ret < 0)
12149                 goto out;
12150         /* Iterate all regular file extents and fill its csum */
12151         while (1) {
12152                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12153
12154                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12155                         goto next;
12156                 node = path.nodes[0];
12157                 slot = path.slots[0];
12158                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12159                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12160                         goto next;
12161                 start = btrfs_file_extent_disk_bytenr(node, fi);
12162                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12163
12164                 ret = populate_csum(trans, csum_root, buf, start, len);
12165                 if (ret == -EEXIST)
12166                         ret = 0;
12167                 if (ret < 0)
12168                         goto out;
12169 next:
12170                 /*
12171                  * TODO: if next leaf is corrupted, jump to nearest next valid
12172                  * leaf.
12173                  */
12174                 ret = btrfs_next_item(cur_root, &path);
12175                 if (ret < 0)
12176                         goto out;
12177                 if (ret > 0) {
12178                         ret = 0;
12179                         goto out;
12180                 }
12181         }
12182
12183 out:
12184         btrfs_release_path(&path);
12185         free(buf);
12186         return ret;
12187 }
12188
12189 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12190                                   struct btrfs_root *csum_root)
12191 {
12192         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12193         struct btrfs_path path;
12194         struct btrfs_root *tree_root = fs_info->tree_root;
12195         struct btrfs_root *cur_root;
12196         struct extent_buffer *node;
12197         struct btrfs_key key;
12198         int slot = 0;
12199         int ret = 0;
12200
12201         btrfs_init_path(&path);
12202         key.objectid = BTRFS_FS_TREE_OBJECTID;
12203         key.offset = 0;
12204         key.type = BTRFS_ROOT_ITEM_KEY;
12205         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12206         if (ret < 0)
12207                 goto out;
12208         if (ret > 0) {
12209                 ret = -ENOENT;
12210                 goto out;
12211         }
12212
12213         while (1) {
12214                 node = path.nodes[0];
12215                 slot = path.slots[0];
12216                 btrfs_item_key_to_cpu(node, &key, slot);
12217                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12218                         goto out;
12219                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12220                         goto next;
12221                 if (!is_fstree(key.objectid))
12222                         goto next;
12223                 key.offset = (u64)-1;
12224
12225                 cur_root = btrfs_read_fs_root(fs_info, &key);
12226                 if (IS_ERR(cur_root) || !cur_root) {
12227                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12228                                 key.objectid);
12229                         goto out;
12230                 }
12231                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12232                                 cur_root);
12233                 if (ret < 0)
12234                         goto out;
12235 next:
12236                 ret = btrfs_next_item(tree_root, &path);
12237                 if (ret > 0) {
12238                         ret = 0;
12239                         goto out;
12240                 }
12241                 if (ret < 0)
12242                         goto out;
12243         }
12244
12245 out:
12246         btrfs_release_path(&path);
12247         return ret;
12248 }
12249
12250 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12251                                       struct btrfs_root *csum_root)
12252 {
12253         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12254         struct btrfs_path path;
12255         struct btrfs_extent_item *ei;
12256         struct extent_buffer *leaf;
12257         char *buf;
12258         struct btrfs_key key;
12259         int ret;
12260
12261         btrfs_init_path(&path);
12262         key.objectid = 0;
12263         key.type = BTRFS_EXTENT_ITEM_KEY;
12264         key.offset = 0;
12265         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12266         if (ret < 0) {
12267                 btrfs_release_path(&path);
12268                 return ret;
12269         }
12270
12271         buf = malloc(csum_root->fs_info->sectorsize);
12272         if (!buf) {
12273                 btrfs_release_path(&path);
12274                 return -ENOMEM;
12275         }
12276
12277         while (1) {
12278                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12279                         ret = btrfs_next_leaf(extent_root, &path);
12280                         if (ret < 0)
12281                                 break;
12282                         if (ret) {
12283                                 ret = 0;
12284                                 break;
12285                         }
12286                 }
12287                 leaf = path.nodes[0];
12288
12289                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12290                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12291                         path.slots[0]++;
12292                         continue;
12293                 }
12294
12295                 ei = btrfs_item_ptr(leaf, path.slots[0],
12296                                     struct btrfs_extent_item);
12297                 if (!(btrfs_extent_flags(leaf, ei) &
12298                       BTRFS_EXTENT_FLAG_DATA)) {
12299                         path.slots[0]++;
12300                         continue;
12301                 }
12302
12303                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12304                                     key.offset);
12305                 if (ret)
12306                         break;
12307                 path.slots[0]++;
12308         }
12309
12310         btrfs_release_path(&path);
12311         free(buf);
12312         return ret;
12313 }
12314
12315 /*
12316  * Recalculate the csum and put it into the csum tree.
12317  *
12318  * Extent tree init will wipe out all the extent info, so in that case, we
12319  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12320  * will use fs/subvol trees to init the csum tree.
12321  */
12322 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12323                           struct btrfs_root *csum_root,
12324                           int search_fs_tree)
12325 {
12326         if (search_fs_tree)
12327                 return fill_csum_tree_from_fs(trans, csum_root);
12328         else
12329                 return fill_csum_tree_from_extent(trans, csum_root);
12330 }
12331
12332 static void free_roots_info_cache(void)
12333 {
12334         if (!roots_info_cache)
12335                 return;
12336
12337         while (!cache_tree_empty(roots_info_cache)) {
12338                 struct cache_extent *entry;
12339                 struct root_item_info *rii;
12340
12341                 entry = first_cache_extent(roots_info_cache);
12342                 if (!entry)
12343                         break;
12344                 remove_cache_extent(roots_info_cache, entry);
12345                 rii = container_of(entry, struct root_item_info, cache_extent);
12346                 free(rii);
12347         }
12348
12349         free(roots_info_cache);
12350         roots_info_cache = NULL;
12351 }
12352
12353 static int build_roots_info_cache(struct btrfs_fs_info *info)
12354 {
12355         int ret = 0;
12356         struct btrfs_key key;
12357         struct extent_buffer *leaf;
12358         struct btrfs_path path;
12359
12360         if (!roots_info_cache) {
12361                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12362                 if (!roots_info_cache)
12363                         return -ENOMEM;
12364                 cache_tree_init(roots_info_cache);
12365         }
12366
12367         btrfs_init_path(&path);
12368         key.objectid = 0;
12369         key.type = BTRFS_EXTENT_ITEM_KEY;
12370         key.offset = 0;
12371         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12372         if (ret < 0)
12373                 goto out;
12374         leaf = path.nodes[0];
12375
12376         while (1) {
12377                 struct btrfs_key found_key;
12378                 struct btrfs_extent_item *ei;
12379                 struct btrfs_extent_inline_ref *iref;
12380                 int slot = path.slots[0];
12381                 int type;
12382                 u64 flags;
12383                 u64 root_id;
12384                 u8 level;
12385                 struct cache_extent *entry;
12386                 struct root_item_info *rii;
12387
12388                 if (slot >= btrfs_header_nritems(leaf)) {
12389                         ret = btrfs_next_leaf(info->extent_root, &path);
12390                         if (ret < 0) {
12391                                 break;
12392                         } else if (ret) {
12393                                 ret = 0;
12394                                 break;
12395                         }
12396                         leaf = path.nodes[0];
12397                         slot = path.slots[0];
12398                 }
12399
12400                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12401
12402                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12403                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12404                         goto next;
12405
12406                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12407                 flags = btrfs_extent_flags(leaf, ei);
12408
12409                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12410                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12411                         goto next;
12412
12413                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12414                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12415                         level = found_key.offset;
12416                 } else {
12417                         struct btrfs_tree_block_info *binfo;
12418
12419                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12420                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12421                         level = btrfs_tree_block_level(leaf, binfo);
12422                 }
12423
12424                 /*
12425                  * For a root extent, it must be of the following type and the
12426                  * first (and only one) iref in the item.
12427                  */
12428                 type = btrfs_extent_inline_ref_type(leaf, iref);
12429                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12430                         goto next;
12431
12432                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12433                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12434                 if (!entry) {
12435                         rii = malloc(sizeof(struct root_item_info));
12436                         if (!rii) {
12437                                 ret = -ENOMEM;
12438                                 goto out;
12439                         }
12440                         rii->cache_extent.start = root_id;
12441                         rii->cache_extent.size = 1;
12442                         rii->level = (u8)-1;
12443                         entry = &rii->cache_extent;
12444                         ret = insert_cache_extent(roots_info_cache, entry);
12445                         ASSERT(ret == 0);
12446                 } else {
12447                         rii = container_of(entry, struct root_item_info,
12448                                            cache_extent);
12449                 }
12450
12451                 ASSERT(rii->cache_extent.start == root_id);
12452                 ASSERT(rii->cache_extent.size == 1);
12453
12454                 if (level > rii->level || rii->level == (u8)-1) {
12455                         rii->level = level;
12456                         rii->bytenr = found_key.objectid;
12457                         rii->gen = btrfs_extent_generation(leaf, ei);
12458                         rii->node_count = 1;
12459                 } else if (level == rii->level) {
12460                         rii->node_count++;
12461                 }
12462 next:
12463                 path.slots[0]++;
12464         }
12465
12466 out:
12467         btrfs_release_path(&path);
12468
12469         return ret;
12470 }
12471
12472 static int maybe_repair_root_item(struct btrfs_path *path,
12473                                   const struct btrfs_key *root_key,
12474                                   const int read_only_mode)
12475 {
12476         const u64 root_id = root_key->objectid;
12477         struct cache_extent *entry;
12478         struct root_item_info *rii;
12479         struct btrfs_root_item ri;
12480         unsigned long offset;
12481
12482         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12483         if (!entry) {
12484                 fprintf(stderr,
12485                         "Error: could not find extent items for root %llu\n",
12486                         root_key->objectid);
12487                 return -ENOENT;
12488         }
12489
12490         rii = container_of(entry, struct root_item_info, cache_extent);
12491         ASSERT(rii->cache_extent.start == root_id);
12492         ASSERT(rii->cache_extent.size == 1);
12493
12494         if (rii->node_count != 1) {
12495                 fprintf(stderr,
12496                         "Error: could not find btree root extent for root %llu\n",
12497                         root_id);
12498                 return -ENOENT;
12499         }
12500
12501         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12502         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12503
12504         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12505             btrfs_root_level(&ri) != rii->level ||
12506             btrfs_root_generation(&ri) != rii->gen) {
12507
12508                 /*
12509                  * If we're in repair mode but our caller told us to not update
12510                  * the root item, i.e. just check if it needs to be updated, don't
12511                  * print this message, since the caller will call us again shortly
12512                  * for the same root item without read only mode (the caller will
12513                  * open a transaction first).
12514                  */
12515                 if (!(read_only_mode && repair))
12516                         fprintf(stderr,
12517                                 "%sroot item for root %llu,"
12518                                 " current bytenr %llu, current gen %llu, current level %u,"
12519                                 " new bytenr %llu, new gen %llu, new level %u\n",
12520                                 (read_only_mode ? "" : "fixing "),
12521                                 root_id,
12522                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12523                                 btrfs_root_level(&ri),
12524                                 rii->bytenr, rii->gen, rii->level);
12525
12526                 if (btrfs_root_generation(&ri) > rii->gen) {
12527                         fprintf(stderr,
12528                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12529                                 root_id, btrfs_root_generation(&ri), rii->gen);
12530                         return -EINVAL;
12531                 }
12532
12533                 if (!read_only_mode) {
12534                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12535                         btrfs_set_root_level(&ri, rii->level);
12536                         btrfs_set_root_generation(&ri, rii->gen);
12537                         write_extent_buffer(path->nodes[0], &ri,
12538                                             offset, sizeof(ri));
12539                 }
12540
12541                 return 1;
12542         }
12543
12544         return 0;
12545 }
12546
12547 /*
12548  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12549  * caused read-only snapshots to be corrupted if they were created at a moment
12550  * when the source subvolume/snapshot had orphan items. The issue was that the
12551  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12552  * node instead of the post orphan cleanup root node.
12553  * So this function, and its callees, just detects and fixes those cases. Even
12554  * though the regression was for read-only snapshots, this function applies to
12555  * any snapshot/subvolume root.
12556  * This must be run before any other repair code - not doing it so, makes other
12557  * repair code delete or modify backrefs in the extent tree for example, which
12558  * will result in an inconsistent fs after repairing the root items.
12559  */
12560 static int repair_root_items(struct btrfs_fs_info *info)
12561 {
12562         struct btrfs_path path;
12563         struct btrfs_key key;
12564         struct extent_buffer *leaf;
12565         struct btrfs_trans_handle *trans = NULL;
12566         int ret = 0;
12567         int bad_roots = 0;
12568         int need_trans = 0;
12569
12570         btrfs_init_path(&path);
12571
12572         ret = build_roots_info_cache(info);
12573         if (ret)
12574                 goto out;
12575
12576         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12577         key.type = BTRFS_ROOT_ITEM_KEY;
12578         key.offset = 0;
12579
12580 again:
12581         /*
12582          * Avoid opening and committing transactions if a leaf doesn't have
12583          * any root items that need to be fixed, so that we avoid rotating
12584          * backup roots unnecessarily.
12585          */
12586         if (need_trans) {
12587                 trans = btrfs_start_transaction(info->tree_root, 1);
12588                 if (IS_ERR(trans)) {
12589                         ret = PTR_ERR(trans);
12590                         goto out;
12591                 }
12592         }
12593
12594         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12595                                 0, trans ? 1 : 0);
12596         if (ret < 0)
12597                 goto out;
12598         leaf = path.nodes[0];
12599
12600         while (1) {
12601                 struct btrfs_key found_key;
12602
12603                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12604                         int no_more_keys = find_next_key(&path, &key);
12605
12606                         btrfs_release_path(&path);
12607                         if (trans) {
12608                                 ret = btrfs_commit_transaction(trans,
12609                                                                info->tree_root);
12610                                 trans = NULL;
12611                                 if (ret < 0)
12612                                         goto out;
12613                         }
12614                         need_trans = 0;
12615                         if (no_more_keys)
12616                                 break;
12617                         goto again;
12618                 }
12619
12620                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12621
12622                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12623                         goto next;
12624                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12625                         goto next;
12626
12627                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12628                 if (ret < 0)
12629                         goto out;
12630                 if (ret) {
12631                         if (!trans && repair) {
12632                                 need_trans = 1;
12633                                 key = found_key;
12634                                 btrfs_release_path(&path);
12635                                 goto again;
12636                         }
12637                         bad_roots++;
12638                 }
12639 next:
12640                 path.slots[0]++;
12641         }
12642         ret = 0;
12643 out:
12644         free_roots_info_cache();
12645         btrfs_release_path(&path);
12646         if (trans)
12647                 btrfs_commit_transaction(trans, info->tree_root);
12648         if (ret < 0)
12649                 return ret;
12650
12651         return bad_roots;
12652 }
12653
12654 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12655 {
12656         struct btrfs_trans_handle *trans;
12657         struct btrfs_block_group_cache *bg_cache;
12658         u64 current = 0;
12659         int ret = 0;
12660
12661         /* Clear all free space cache inodes and its extent data */
12662         while (1) {
12663                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12664                 if (!bg_cache)
12665                         break;
12666                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12667                 if (ret < 0)
12668                         return ret;
12669                 current = bg_cache->key.objectid + bg_cache->key.offset;
12670         }
12671
12672         /* Don't forget to set cache_generation to -1 */
12673         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12674         if (IS_ERR(trans)) {
12675                 error("failed to update super block cache generation");
12676                 return PTR_ERR(trans);
12677         }
12678         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12679         btrfs_commit_transaction(trans, fs_info->tree_root);
12680
12681         return ret;
12682 }
12683
12684 const char * const cmd_check_usage[] = {
12685         "btrfs check [options] <device>",
12686         "Check structural integrity of a filesystem (unmounted).",
12687         "Check structural integrity of an unmounted filesystem. Verify internal",
12688         "trees' consistency and item connectivity. In the repair mode try to",
12689         "fix the problems found. ",
12690         "WARNING: the repair mode is considered dangerous",
12691         "",
12692         "-s|--super <superblock>     use this superblock copy",
12693         "-b|--backup                 use the first valid backup root copy",
12694         "--repair                    try to repair the filesystem",
12695         "--readonly                  run in read-only mode (default)",
12696         "--init-csum-tree            create a new CRC tree",
12697         "--init-extent-tree          create a new extent tree",
12698         "--mode <MODE>               allows choice of memory/IO trade-offs",
12699         "                            where MODE is one of:",
12700         "                            original - read inodes and extents to memory (requires",
12701         "                                       more memory, does less IO)",
12702         "                            lowmem   - try to use less memory but read blocks again",
12703         "                                       when needed",
12704         "--check-data-csum           verify checksums of data blocks",
12705         "-Q|--qgroup-report          print a report on qgroup consistency",
12706         "-E|--subvol-extents <subvolid>",
12707         "                            print subvolume extents and sharing state",
12708         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12709         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12710         "-p|--progress               indicate progress",
12711         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12712         NULL
12713 };
12714
12715 int cmd_check(int argc, char **argv)
12716 {
12717         struct cache_tree root_cache;
12718         struct btrfs_root *root;
12719         struct btrfs_fs_info *info;
12720         u64 bytenr = 0;
12721         u64 subvolid = 0;
12722         u64 tree_root_bytenr = 0;
12723         u64 chunk_root_bytenr = 0;
12724         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12725         int ret;
12726         int err = 0;
12727         u64 num;
12728         int init_csum_tree = 0;
12729         int readonly = 0;
12730         int clear_space_cache = 0;
12731         int qgroup_report = 0;
12732         int qgroups_repaired = 0;
12733         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12734
12735         while(1) {
12736                 int c;
12737                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12738                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12739                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12740                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12741                 static const struct option long_options[] = {
12742                         { "super", required_argument, NULL, 's' },
12743                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12744                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12745                         { "init-csum-tree", no_argument, NULL,
12746                                 GETOPT_VAL_INIT_CSUM },
12747                         { "init-extent-tree", no_argument, NULL,
12748                                 GETOPT_VAL_INIT_EXTENT },
12749                         { "check-data-csum", no_argument, NULL,
12750                                 GETOPT_VAL_CHECK_CSUM },
12751                         { "backup", no_argument, NULL, 'b' },
12752                         { "subvol-extents", required_argument, NULL, 'E' },
12753                         { "qgroup-report", no_argument, NULL, 'Q' },
12754                         { "tree-root", required_argument, NULL, 'r' },
12755                         { "chunk-root", required_argument, NULL,
12756                                 GETOPT_VAL_CHUNK_TREE },
12757                         { "progress", no_argument, NULL, 'p' },
12758                         { "mode", required_argument, NULL,
12759                                 GETOPT_VAL_MODE },
12760                         { "clear-space-cache", required_argument, NULL,
12761                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12762                         { NULL, 0, NULL, 0}
12763                 };
12764
12765                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12766                 if (c < 0)
12767                         break;
12768                 switch(c) {
12769                         case 'a': /* ignored */ break;
12770                         case 'b':
12771                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12772                                 break;
12773                         case 's':
12774                                 num = arg_strtou64(optarg);
12775                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12776                                         error(
12777                                         "super mirror should be less than %d",
12778                                                 BTRFS_SUPER_MIRROR_MAX);
12779                                         exit(1);
12780                                 }
12781                                 bytenr = btrfs_sb_offset(((int)num));
12782                                 printf("using SB copy %llu, bytenr %llu\n", num,
12783                                        (unsigned long long)bytenr);
12784                                 break;
12785                         case 'Q':
12786                                 qgroup_report = 1;
12787                                 break;
12788                         case 'E':
12789                                 subvolid = arg_strtou64(optarg);
12790                                 break;
12791                         case 'r':
12792                                 tree_root_bytenr = arg_strtou64(optarg);
12793                                 break;
12794                         case GETOPT_VAL_CHUNK_TREE:
12795                                 chunk_root_bytenr = arg_strtou64(optarg);
12796                                 break;
12797                         case 'p':
12798                                 ctx.progress_enabled = true;
12799                                 break;
12800                         case '?':
12801                         case 'h':
12802                                 usage(cmd_check_usage);
12803                         case GETOPT_VAL_REPAIR:
12804                                 printf("enabling repair mode\n");
12805                                 repair = 1;
12806                                 ctree_flags |= OPEN_CTREE_WRITES;
12807                                 break;
12808                         case GETOPT_VAL_READONLY:
12809                                 readonly = 1;
12810                                 break;
12811                         case GETOPT_VAL_INIT_CSUM:
12812                                 printf("Creating a new CRC tree\n");
12813                                 init_csum_tree = 1;
12814                                 repair = 1;
12815                                 ctree_flags |= OPEN_CTREE_WRITES;
12816                                 break;
12817                         case GETOPT_VAL_INIT_EXTENT:
12818                                 init_extent_tree = 1;
12819                                 ctree_flags |= (OPEN_CTREE_WRITES |
12820                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12821                                 repair = 1;
12822                                 break;
12823                         case GETOPT_VAL_CHECK_CSUM:
12824                                 check_data_csum = 1;
12825                                 break;
12826                         case GETOPT_VAL_MODE:
12827                                 check_mode = parse_check_mode(optarg);
12828                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12829                                         error("unknown mode: %s", optarg);
12830                                         exit(1);
12831                                 }
12832                                 break;
12833                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12834                                 if (strcmp(optarg, "v1") == 0) {
12835                                         clear_space_cache = 1;
12836                                 } else if (strcmp(optarg, "v2") == 0) {
12837                                         clear_space_cache = 2;
12838                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12839                                 } else {
12840                                         error(
12841                 "invalid argument to --clear-space-cache, must be v1 or v2");
12842                                         exit(1);
12843                                 }
12844                                 ctree_flags |= OPEN_CTREE_WRITES;
12845                                 break;
12846                 }
12847         }
12848
12849         if (check_argc_exact(argc - optind, 1))
12850                 usage(cmd_check_usage);
12851
12852         if (ctx.progress_enabled) {
12853                 ctx.tp = TASK_NOTHING;
12854                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12855         }
12856
12857         /* This check is the only reason for --readonly to exist */
12858         if (readonly && repair) {
12859                 error("repair options are not compatible with --readonly");
12860                 exit(1);
12861         }
12862
12863         /*
12864          * Not supported yet
12865          */
12866         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12867                 error("low memory mode doesn't support repair yet");
12868                 exit(1);
12869         }
12870
12871         radix_tree_init();
12872         cache_tree_init(&root_cache);
12873
12874         if((ret = check_mounted(argv[optind])) < 0) {
12875                 error("could not check mount status: %s", strerror(-ret));
12876                 err |= !!ret;
12877                 goto err_out;
12878         } else if(ret) {
12879                 error("%s is currently mounted, aborting", argv[optind]);
12880                 ret = -EBUSY;
12881                 err |= !!ret;
12882                 goto err_out;
12883         }
12884
12885         /* only allow partial opening under repair mode */
12886         if (repair)
12887                 ctree_flags |= OPEN_CTREE_PARTIAL;
12888
12889         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12890                                   chunk_root_bytenr, ctree_flags);
12891         if (!info) {
12892                 error("cannot open file system");
12893                 ret = -EIO;
12894                 err |= !!ret;
12895                 goto err_out;
12896         }
12897
12898         global_info = info;
12899         root = info->fs_root;
12900         if (clear_space_cache == 1) {
12901                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12902                         error(
12903                 "free space cache v2 detected, use --clear-space-cache v2");
12904                         ret = 1;
12905                         goto close_out;
12906                 }
12907                 printf("Clearing free space cache\n");
12908                 ret = clear_free_space_cache(info);
12909                 if (ret) {
12910                         error("failed to clear free space cache");
12911                         ret = 1;
12912                 } else {
12913                         printf("Free space cache cleared\n");
12914                 }
12915                 goto close_out;
12916         } else if (clear_space_cache == 2) {
12917                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12918                         printf("no free space cache v2 to clear\n");
12919                         ret = 0;
12920                         goto close_out;
12921                 }
12922                 printf("Clear free space cache v2\n");
12923                 ret = btrfs_clear_free_space_tree(info);
12924                 if (ret) {
12925                         error("failed to clear free space cache v2: %d", ret);
12926                         ret = 1;
12927                 } else {
12928                         printf("free space cache v2 cleared\n");
12929                 }
12930                 goto close_out;
12931         }
12932
12933         /*
12934          * repair mode will force us to commit transaction which
12935          * will make us fail to load log tree when mounting.
12936          */
12937         if (repair && btrfs_super_log_root(info->super_copy)) {
12938                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12939                 if (!ret) {
12940                         ret = 1;
12941                         err |= !!ret;
12942                         goto close_out;
12943                 }
12944                 ret = zero_log_tree(root);
12945                 err |= !!ret;
12946                 if (ret) {
12947                         error("failed to zero log tree: %d", ret);
12948                         goto close_out;
12949                 }
12950         }
12951
12952         uuid_unparse(info->super_copy->fsid, uuidbuf);
12953         if (qgroup_report) {
12954                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12955                        uuidbuf);
12956                 ret = qgroup_verify_all(info);
12957                 err |= !!ret;
12958                 if (ret == 0)
12959                         report_qgroups(1);
12960                 goto close_out;
12961         }
12962         if (subvolid) {
12963                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12964                        subvolid, argv[optind], uuidbuf);
12965                 ret = print_extent_state(info, subvolid);
12966                 err |= !!ret;
12967                 goto close_out;
12968         }
12969         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12970
12971         if (!extent_buffer_uptodate(info->tree_root->node) ||
12972             !extent_buffer_uptodate(info->dev_root->node) ||
12973             !extent_buffer_uptodate(info->chunk_root->node)) {
12974                 error("critical roots corrupted, unable to check the filesystem");
12975                 err |= !!ret;
12976                 ret = -EIO;
12977                 goto close_out;
12978         }
12979
12980         if (init_extent_tree || init_csum_tree) {
12981                 struct btrfs_trans_handle *trans;
12982
12983                 trans = btrfs_start_transaction(info->extent_root, 0);
12984                 if (IS_ERR(trans)) {
12985                         error("error starting transaction");
12986                         ret = PTR_ERR(trans);
12987                         err |= !!ret;
12988                         goto close_out;
12989                 }
12990
12991                 if (init_extent_tree) {
12992                         printf("Creating a new extent tree\n");
12993                         ret = reinit_extent_tree(trans, info);
12994                         err |= !!ret;
12995                         if (ret)
12996                                 goto close_out;
12997                 }
12998
12999                 if (init_csum_tree) {
13000                         printf("Reinitialize checksum tree\n");
13001                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13002                         if (ret) {
13003                                 error("checksum tree initialization failed: %d",
13004                                                 ret);
13005                                 ret = -EIO;
13006                                 err |= !!ret;
13007                                 goto close_out;
13008                         }
13009
13010                         ret = fill_csum_tree(trans, info->csum_root,
13011                                              init_extent_tree);
13012                         err |= !!ret;
13013                         if (ret) {
13014                                 error("checksum tree refilling failed: %d", ret);
13015                                 return -EIO;
13016                         }
13017                 }
13018                 /*
13019                  * Ok now we commit and run the normal fsck, which will add
13020                  * extent entries for all of the items it finds.
13021                  */
13022                 ret = btrfs_commit_transaction(trans, info->extent_root);
13023                 err |= !!ret;
13024                 if (ret)
13025                         goto close_out;
13026         }
13027         if (!extent_buffer_uptodate(info->extent_root->node)) {
13028                 error("critical: extent_root, unable to check the filesystem");
13029                 ret = -EIO;
13030                 err |= !!ret;
13031                 goto close_out;
13032         }
13033         if (!extent_buffer_uptodate(info->csum_root->node)) {
13034                 error("critical: csum_root, unable to check the filesystem");
13035                 ret = -EIO;
13036                 err |= !!ret;
13037                 goto close_out;
13038         }
13039
13040         if (!ctx.progress_enabled)
13041                 fprintf(stderr, "checking extents\n");
13042         if (check_mode == CHECK_MODE_LOWMEM)
13043                 ret = check_chunks_and_extents_v2(root);
13044         else
13045                 ret = check_chunks_and_extents(root);
13046         err |= !!ret;
13047         if (ret)
13048                 error(
13049                 "errors found in extent allocation tree or chunk allocation");
13050
13051         ret = repair_root_items(info);
13052         err |= !!ret;
13053         if (ret < 0) {
13054                 error("failed to repair root items: %s", strerror(-ret));
13055                 goto close_out;
13056         }
13057         if (repair) {
13058                 fprintf(stderr, "Fixed %d roots.\n", ret);
13059                 ret = 0;
13060         } else if (ret > 0) {
13061                 fprintf(stderr,
13062                        "Found %d roots with an outdated root item.\n",
13063                        ret);
13064                 fprintf(stderr,
13065                         "Please run a filesystem check with the option --repair to fix them.\n");
13066                 ret = 1;
13067                 err |= !!ret;
13068                 goto close_out;
13069         }
13070
13071         if (!ctx.progress_enabled) {
13072                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13073                         fprintf(stderr, "checking free space tree\n");
13074                 else
13075                         fprintf(stderr, "checking free space cache\n");
13076         }
13077         ret = check_space_cache(root);
13078         err |= !!ret;
13079         if (ret) {
13080                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13081                         error("errors found in free space tree");
13082                 else
13083                         error("errors found in free space cache");
13084                 goto out;
13085         }
13086
13087         /*
13088          * We used to have to have these hole extents in between our real
13089          * extents so if we don't have this flag set we need to make sure there
13090          * are no gaps in the file extents for inodes, otherwise we can just
13091          * ignore it when this happens.
13092          */
13093         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13094         if (!ctx.progress_enabled)
13095                 fprintf(stderr, "checking fs roots\n");
13096         if (check_mode == CHECK_MODE_LOWMEM)
13097                 ret = check_fs_roots_v2(root->fs_info);
13098         else
13099                 ret = check_fs_roots(root, &root_cache);
13100         err |= !!ret;
13101         if (ret) {
13102                 error("errors found in fs roots");
13103                 goto out;
13104         }
13105
13106         fprintf(stderr, "checking csums\n");
13107         ret = check_csums(root);
13108         err |= !!ret;
13109         if (ret) {
13110                 error("errors found in csum tree");
13111                 goto out;
13112         }
13113
13114         fprintf(stderr, "checking root refs\n");
13115         /* For low memory mode, check_fs_roots_v2 handles root refs */
13116         if (check_mode != CHECK_MODE_LOWMEM) {
13117                 ret = check_root_refs(root, &root_cache);
13118                 err |= !!ret;
13119                 if (ret) {
13120                         error("errors found in root refs");
13121                         goto out;
13122                 }
13123         }
13124
13125         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13126                 struct extent_buffer *eb;
13127
13128                 eb = list_first_entry(&root->fs_info->recow_ebs,
13129                                       struct extent_buffer, recow);
13130                 list_del_init(&eb->recow);
13131                 ret = recow_extent_buffer(root, eb);
13132                 err |= !!ret;
13133                 if (ret) {
13134                         error("fails to fix transid errors");
13135                         break;
13136                 }
13137         }
13138
13139         while (!list_empty(&delete_items)) {
13140                 struct bad_item *bad;
13141
13142                 bad = list_first_entry(&delete_items, struct bad_item, list);
13143                 list_del_init(&bad->list);
13144                 if (repair) {
13145                         ret = delete_bad_item(root, bad);
13146                         err |= !!ret;
13147                 }
13148                 free(bad);
13149         }
13150
13151         if (info->quota_enabled) {
13152                 fprintf(stderr, "checking quota groups\n");
13153                 ret = qgroup_verify_all(info);
13154                 err |= !!ret;
13155                 if (ret) {
13156                         error("failed to check quota groups");
13157                         goto out;
13158                 }
13159                 report_qgroups(0);
13160                 ret = repair_qgroups(info, &qgroups_repaired);
13161                 err |= !!ret;
13162                 if (err) {
13163                         error("failed to repair quota groups");
13164                         goto out;
13165                 }
13166                 ret = 0;
13167         }
13168
13169         if (!list_empty(&root->fs_info->recow_ebs)) {
13170                 error("transid errors in file system");
13171                 ret = 1;
13172                 err |= !!ret;
13173         }
13174 out:
13175         if (found_old_backref) { /*
13176                  * there was a disk format change when mixed
13177                  * backref was in testing tree. The old format
13178                  * existed about one week.
13179                  */
13180                 printf("\n * Found old mixed backref format. "
13181                        "The old format is not supported! *"
13182                        "\n * Please mount the FS in readonly mode, "
13183                        "backup data and re-format the FS. *\n\n");
13184                 err |= 1;
13185         }
13186         printf("found %llu bytes used, ",
13187                (unsigned long long)bytes_used);
13188         if (err)
13189                 printf("error(s) found\n");
13190         else
13191                 printf("no error found\n");
13192         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13193         printf("total tree bytes: %llu\n",
13194                (unsigned long long)total_btree_bytes);
13195         printf("total fs tree bytes: %llu\n",
13196                (unsigned long long)total_fs_tree_bytes);
13197         printf("total extent tree bytes: %llu\n",
13198                (unsigned long long)total_extent_tree_bytes);
13199         printf("btree space waste bytes: %llu\n",
13200                (unsigned long long)btree_space_waste);
13201         printf("file data blocks allocated: %llu\n referenced %llu\n",
13202                 (unsigned long long)data_bytes_allocated,
13203                 (unsigned long long)data_bytes_referenced);
13204
13205         free_qgroup_counts();
13206         free_root_recs_tree(&root_cache);
13207 close_out:
13208         close_ctree(root);
13209 err_out:
13210         if (ctx.progress_enabled)
13211                 task_deinit(ctx.info);
13212
13213         return err;
13214 }