c5faa2b3505cd1311a1046a3ed447353980ee11f
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize,
833                                          root->fs_info->sectorsize));
834         }
835 }
836
837 static void print_ref_error(int errors)
838 {
839         if (errors & REF_ERR_NO_DIR_ITEM)
840                 fprintf(stderr, ", no dir item");
841         if (errors & REF_ERR_NO_DIR_INDEX)
842                 fprintf(stderr, ", no dir index");
843         if (errors & REF_ERR_NO_INODE_REF)
844                 fprintf(stderr, ", no inode ref");
845         if (errors & REF_ERR_DUP_DIR_ITEM)
846                 fprintf(stderr, ", dup dir item");
847         if (errors & REF_ERR_DUP_DIR_INDEX)
848                 fprintf(stderr, ", dup dir index");
849         if (errors & REF_ERR_DUP_INODE_REF)
850                 fprintf(stderr, ", dup inode ref");
851         if (errors & REF_ERR_INDEX_UNMATCH)
852                 fprintf(stderr, ", index mismatch");
853         if (errors & REF_ERR_FILETYPE_UNMATCH)
854                 fprintf(stderr, ", filetype mismatch");
855         if (errors & REF_ERR_NAME_TOO_LONG)
856                 fprintf(stderr, ", name too long");
857         if (errors & REF_ERR_NO_ROOT_REF)
858                 fprintf(stderr, ", no root ref");
859         if (errors & REF_ERR_NO_ROOT_BACKREF)
860                 fprintf(stderr, ", no root backref");
861         if (errors & REF_ERR_DUP_ROOT_REF)
862                 fprintf(stderr, ", dup root ref");
863         if (errors & REF_ERR_DUP_ROOT_BACKREF)
864                 fprintf(stderr, ", dup root backref");
865         fprintf(stderr, "\n");
866 }
867
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869                                           u64 ino, int mod)
870 {
871         struct ptr_node *node;
872         struct cache_extent *cache;
873         struct inode_record *rec = NULL;
874         int ret;
875
876         cache = lookup_cache_extent(inode_cache, ino, 1);
877         if (cache) {
878                 node = container_of(cache, struct ptr_node, cache);
879                 rec = node->data;
880                 if (mod && rec->refs > 1) {
881                         node->data = clone_inode_rec(rec);
882                         if (IS_ERR(node->data))
883                                 return node->data;
884                         rec->refs--;
885                         rec = node->data;
886                 }
887         } else if (mod) {
888                 rec = calloc(1, sizeof(*rec));
889                 if (!rec)
890                         return ERR_PTR(-ENOMEM);
891                 rec->ino = ino;
892                 rec->extent_start = (u64)-1;
893                 rec->refs = 1;
894                 INIT_LIST_HEAD(&rec->backrefs);
895                 INIT_LIST_HEAD(&rec->orphan_extents);
896                 rec->holes = RB_ROOT;
897
898                 node = malloc(sizeof(*node));
899                 if (!node) {
900                         free(rec);
901                         return ERR_PTR(-ENOMEM);
902                 }
903                 node->cache.start = ino;
904                 node->cache.size = 1;
905                 node->data = rec;
906
907                 if (ino == BTRFS_FREE_INO_OBJECTID)
908                         rec->found_link = 1;
909
910                 ret = insert_cache_extent(inode_cache, &node->cache);
911                 if (ret)
912                         return ERR_PTR(-EEXIST);
913         }
914         return rec;
915 }
916
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 {
919         struct orphan_data_extent *orphan;
920
921         while (!list_empty(orphan_extents)) {
922                 orphan = list_entry(orphan_extents->next,
923                                     struct orphan_data_extent, list);
924                 list_del(&orphan->list);
925                 free(orphan);
926         }
927 }
928
929 static void free_inode_rec(struct inode_record *rec)
930 {
931         struct inode_backref *backref;
932
933         if (--rec->refs > 0)
934                 return;
935
936         while (!list_empty(&rec->backrefs)) {
937                 backref = to_inode_backref(rec->backrefs.next);
938                 list_del(&backref->list);
939                 free(backref);
940         }
941         free_orphan_data_extents(&rec->orphan_extents);
942         free_file_extent_holes(&rec->holes);
943         free(rec);
944 }
945
946 static int can_free_inode_rec(struct inode_record *rec)
947 {
948         if (!rec->errors && rec->checked && rec->found_inode_item &&
949             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950                 return 1;
951         return 0;
952 }
953
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955                                  struct inode_record *rec)
956 {
957         struct cache_extent *cache;
958         struct inode_backref *tmp, *backref;
959         struct ptr_node *node;
960         u8 filetype;
961
962         if (!rec->found_inode_item)
963                 return;
964
965         filetype = imode_to_type(rec->imode);
966         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967                 if (backref->found_dir_item && backref->found_dir_index) {
968                         if (backref->filetype != filetype)
969                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970                         if (!backref->errors && backref->found_inode_ref &&
971                             rec->nlink == rec->found_link) {
972                                 list_del(&backref->list);
973                                 free(backref);
974                         }
975                 }
976         }
977
978         if (!rec->checked || rec->merging)
979                 return;
980
981         if (S_ISDIR(rec->imode)) {
982                 if (rec->found_size != rec->isize)
983                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984                 if (rec->found_file_extent)
985                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
986         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987                 if (rec->found_dir_item)
988                         rec->errors |= I_ERR_ODD_DIR_ITEM;
989                 if (rec->found_size != rec->nbytes)
990                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991                 if (rec->nlink > 0 && !no_holes &&
992                     (rec->extent_end < rec->isize ||
993                      first_extent_gap(&rec->holes) < rec->isize))
994                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995         }
996
997         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998                 if (rec->found_csum_item && rec->nodatasum)
999                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000                 if (rec->some_csum_missing && !rec->nodatasum)
1001                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002         }
1003
1004         BUG_ON(rec->refs != 1);
1005         if (can_free_inode_rec(rec)) {
1006                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007                 node = container_of(cache, struct ptr_node, cache);
1008                 BUG_ON(node->data != rec);
1009                 remove_cache_extent(inode_cache, &node->cache);
1010                 free(node);
1011                 free_inode_rec(rec);
1012         }
1013 }
1014
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 {
1017         struct btrfs_path path;
1018         struct btrfs_key key;
1019         int ret;
1020
1021         key.objectid = BTRFS_ORPHAN_OBJECTID;
1022         key.type = BTRFS_ORPHAN_ITEM_KEY;
1023         key.offset = ino;
1024
1025         btrfs_init_path(&path);
1026         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027         btrfs_release_path(&path);
1028         if (ret > 0)
1029                 ret = -ENOENT;
1030         return ret;
1031 }
1032
1033 static int process_inode_item(struct extent_buffer *eb,
1034                               int slot, struct btrfs_key *key,
1035                               struct shared_node *active_node)
1036 {
1037         struct inode_record *rec;
1038         struct btrfs_inode_item *item;
1039
1040         rec = active_node->current;
1041         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042         if (rec->found_inode_item) {
1043                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044                 return 1;
1045         }
1046         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047         rec->nlink = btrfs_inode_nlink(eb, item);
1048         rec->isize = btrfs_inode_size(eb, item);
1049         rec->nbytes = btrfs_inode_nbytes(eb, item);
1050         rec->imode = btrfs_inode_mode(eb, item);
1051         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052                 rec->nodatasum = 1;
1053         rec->found_inode_item = 1;
1054         if (rec->nlink == 0)
1055                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056         maybe_free_inode_rec(&active_node->inode_cache, rec);
1057         return 0;
1058 }
1059
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061                                                 const char *name,
1062                                                 int namelen, u64 dir)
1063 {
1064         struct inode_backref *backref;
1065
1066         list_for_each_entry(backref, &rec->backrefs, list) {
1067                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068                         break;
1069                 if (backref->dir != dir || backref->namelen != namelen)
1070                         continue;
1071                 if (memcmp(name, backref->name, namelen))
1072                         continue;
1073                 return backref;
1074         }
1075
1076         backref = malloc(sizeof(*backref) + namelen + 1);
1077         if (!backref)
1078                 return NULL;
1079         memset(backref, 0, sizeof(*backref));
1080         backref->dir = dir;
1081         backref->namelen = namelen;
1082         memcpy(backref->name, name, namelen);
1083         backref->name[namelen] = '\0';
1084         list_add_tail(&backref->list, &rec->backrefs);
1085         return backref;
1086 }
1087
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089                              u64 ino, u64 dir, u64 index,
1090                              const char *name, int namelen,
1091                              u8 filetype, u8 itemtype, int errors)
1092 {
1093         struct inode_record *rec;
1094         struct inode_backref *backref;
1095
1096         rec = get_inode_rec(inode_cache, ino, 1);
1097         BUG_ON(IS_ERR(rec));
1098         backref = get_inode_backref(rec, name, namelen, dir);
1099         BUG_ON(!backref);
1100         if (errors)
1101                 backref->errors |= errors;
1102         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103                 if (backref->found_dir_index)
1104                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105                 if (backref->found_inode_ref && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 if (backref->found_dir_item && backref->filetype != filetype)
1108                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109
1110                 backref->index = index;
1111                 backref->filetype = filetype;
1112                 backref->found_dir_index = 1;
1113         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114                 rec->found_link++;
1115                 if (backref->found_dir_item)
1116                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117                 if (backref->found_dir_index && backref->filetype != filetype)
1118                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119
1120                 backref->filetype = filetype;
1121                 backref->found_dir_item = 1;
1122         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124                 if (backref->found_inode_ref)
1125                         backref->errors |= REF_ERR_DUP_INODE_REF;
1126                 if (backref->found_dir_index && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 else
1129                         backref->index = index;
1130
1131                 backref->ref_type = itemtype;
1132                 backref->found_inode_ref = 1;
1133         } else {
1134                 BUG_ON(1);
1135         }
1136
1137         maybe_free_inode_rec(inode_cache, rec);
1138         return 0;
1139 }
1140
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142                             struct cache_tree *dst_cache)
1143 {
1144         struct inode_backref *backref;
1145         u32 dir_count = 0;
1146         int ret = 0;
1147
1148         dst->merging = 1;
1149         list_for_each_entry(backref, &src->backrefs, list) {
1150                 if (backref->found_dir_index) {
1151                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1152                                         backref->index, backref->name,
1153                                         backref->namelen, backref->filetype,
1154                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1155                 }
1156                 if (backref->found_dir_item) {
1157                         dir_count++;
1158                         add_inode_backref(dst_cache, dst->ino,
1159                                         backref->dir, 0, backref->name,
1160                                         backref->namelen, backref->filetype,
1161                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1162                 }
1163                 if (backref->found_inode_ref) {
1164                         add_inode_backref(dst_cache, dst->ino,
1165                                         backref->dir, backref->index,
1166                                         backref->name, backref->namelen, 0,
1167                                         backref->ref_type, backref->errors);
1168                 }
1169         }
1170
1171         if (src->found_dir_item)
1172                 dst->found_dir_item = 1;
1173         if (src->found_file_extent)
1174                 dst->found_file_extent = 1;
1175         if (src->found_csum_item)
1176                 dst->found_csum_item = 1;
1177         if (src->some_csum_missing)
1178                 dst->some_csum_missing = 1;
1179         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181                 if (ret < 0)
1182                         return ret;
1183         }
1184
1185         BUG_ON(src->found_link < dir_count);
1186         dst->found_link += src->found_link - dir_count;
1187         dst->found_size += src->found_size;
1188         if (src->extent_start != (u64)-1) {
1189                 if (dst->extent_start == (u64)-1) {
1190                         dst->extent_start = src->extent_start;
1191                         dst->extent_end = src->extent_end;
1192                 } else {
1193                         if (dst->extent_end > src->extent_start)
1194                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195                         else if (dst->extent_end < src->extent_start) {
1196                                 ret = add_file_extent_hole(&dst->holes,
1197                                         dst->extent_end,
1198                                         src->extent_start - dst->extent_end);
1199                         }
1200                         if (dst->extent_end < src->extent_end)
1201                                 dst->extent_end = src->extent_end;
1202                 }
1203         }
1204
1205         dst->errors |= src->errors;
1206         if (src->found_inode_item) {
1207                 if (!dst->found_inode_item) {
1208                         dst->nlink = src->nlink;
1209                         dst->isize = src->isize;
1210                         dst->nbytes = src->nbytes;
1211                         dst->imode = src->imode;
1212                         dst->nodatasum = src->nodatasum;
1213                         dst->found_inode_item = 1;
1214                 } else {
1215                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1216                 }
1217         }
1218         dst->merging = 0;
1219
1220         return 0;
1221 }
1222
1223 static int splice_shared_node(struct shared_node *src_node,
1224                               struct shared_node *dst_node)
1225 {
1226         struct cache_extent *cache;
1227         struct ptr_node *node, *ins;
1228         struct cache_tree *src, *dst;
1229         struct inode_record *rec, *conflict;
1230         u64 current_ino = 0;
1231         int splice = 0;
1232         int ret;
1233
1234         if (--src_node->refs == 0)
1235                 splice = 1;
1236         if (src_node->current)
1237                 current_ino = src_node->current->ino;
1238
1239         src = &src_node->root_cache;
1240         dst = &dst_node->root_cache;
1241 again:
1242         cache = search_cache_extent(src, 0);
1243         while (cache) {
1244                 node = container_of(cache, struct ptr_node, cache);
1245                 rec = node->data;
1246                 cache = next_cache_extent(cache);
1247
1248                 if (splice) {
1249                         remove_cache_extent(src, &node->cache);
1250                         ins = node;
1251                 } else {
1252                         ins = malloc(sizeof(*ins));
1253                         BUG_ON(!ins);
1254                         ins->cache.start = node->cache.start;
1255                         ins->cache.size = node->cache.size;
1256                         ins->data = rec;
1257                         rec->refs++;
1258                 }
1259                 ret = insert_cache_extent(dst, &ins->cache);
1260                 if (ret == -EEXIST) {
1261                         conflict = get_inode_rec(dst, rec->ino, 1);
1262                         BUG_ON(IS_ERR(conflict));
1263                         merge_inode_recs(rec, conflict, dst);
1264                         if (rec->checked) {
1265                                 conflict->checked = 1;
1266                                 if (dst_node->current == conflict)
1267                                         dst_node->current = NULL;
1268                         }
1269                         maybe_free_inode_rec(dst, conflict);
1270                         free_inode_rec(rec);
1271                         free(ins);
1272                 } else {
1273                         BUG_ON(ret);
1274                 }
1275         }
1276
1277         if (src == &src_node->root_cache) {
1278                 src = &src_node->inode_cache;
1279                 dst = &dst_node->inode_cache;
1280                 goto again;
1281         }
1282
1283         if (current_ino > 0 && (!dst_node->current ||
1284             current_ino > dst_node->current->ino)) {
1285                 if (dst_node->current) {
1286                         dst_node->current->checked = 1;
1287                         maybe_free_inode_rec(dst, dst_node->current);
1288                 }
1289                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290                 BUG_ON(IS_ERR(dst_node->current));
1291         }
1292         return 0;
1293 }
1294
1295 static void free_inode_ptr(struct cache_extent *cache)
1296 {
1297         struct ptr_node *node;
1298         struct inode_record *rec;
1299
1300         node = container_of(cache, struct ptr_node, cache);
1301         rec = node->data;
1302         free_inode_rec(rec);
1303         free(node);
1304 }
1305
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309                                             u64 bytenr)
1310 {
1311         struct cache_extent *cache;
1312         struct shared_node *node;
1313
1314         cache = lookup_cache_extent(shared, bytenr, 1);
1315         if (cache) {
1316                 node = container_of(cache, struct shared_node, cache);
1317                 return node;
1318         }
1319         return NULL;
1320 }
1321
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 {
1324         int ret;
1325         struct shared_node *node;
1326
1327         node = calloc(1, sizeof(*node));
1328         if (!node)
1329                 return -ENOMEM;
1330         node->cache.start = bytenr;
1331         node->cache.size = 1;
1332         cache_tree_init(&node->root_cache);
1333         cache_tree_init(&node->inode_cache);
1334         node->refs = refs;
1335
1336         ret = insert_cache_extent(shared, &node->cache);
1337
1338         return ret;
1339 }
1340
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342                              struct walk_control *wc, int level)
1343 {
1344         struct shared_node *node;
1345         struct shared_node *dest;
1346         int ret;
1347
1348         if (level == wc->active_node)
1349                 return 0;
1350
1351         BUG_ON(wc->active_node <= level);
1352         node = find_shared_node(&wc->shared, bytenr);
1353         if (!node) {
1354                 ret = add_shared_node(&wc->shared, bytenr, refs);
1355                 BUG_ON(ret);
1356                 node = find_shared_node(&wc->shared, bytenr);
1357                 wc->nodes[level] = node;
1358                 wc->active_node = level;
1359                 return 0;
1360         }
1361
1362         if (wc->root_level == wc->active_node &&
1363             btrfs_root_refs(&root->root_item) == 0) {
1364                 if (--node->refs == 0) {
1365                         free_inode_recs_tree(&node->root_cache);
1366                         free_inode_recs_tree(&node->inode_cache);
1367                         remove_cache_extent(&wc->shared, &node->cache);
1368                         free(node);
1369                 }
1370                 return 1;
1371         }
1372
1373         dest = wc->nodes[wc->active_node];
1374         splice_shared_node(node, dest);
1375         if (node->refs == 0) {
1376                 remove_cache_extent(&wc->shared, &node->cache);
1377                 free(node);
1378         }
1379         return 1;
1380 }
1381
1382 static int leave_shared_node(struct btrfs_root *root,
1383                              struct walk_control *wc, int level)
1384 {
1385         struct shared_node *node;
1386         struct shared_node *dest;
1387         int i;
1388
1389         if (level == wc->root_level)
1390                 return 0;
1391
1392         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393                 if (wc->nodes[i])
1394                         break;
1395         }
1396         BUG_ON(i >= BTRFS_MAX_LEVEL);
1397
1398         node = wc->nodes[wc->active_node];
1399         wc->nodes[wc->active_node] = NULL;
1400         wc->active_node = i;
1401
1402         dest = wc->nodes[wc->active_node];
1403         if (wc->active_node < wc->root_level ||
1404             btrfs_root_refs(&root->root_item) > 0) {
1405                 BUG_ON(node->refs <= 1);
1406                 splice_shared_node(node, dest);
1407         } else {
1408                 BUG_ON(node->refs < 2);
1409                 node->refs--;
1410         }
1411         return 0;
1412 }
1413
1414 /*
1415  * Returns:
1416  * < 0 - on error
1417  * 1   - if the root with id child_root_id is a child of root parent_root_id
1418  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1419  *       has other root(s) as parent(s)
1420  * 2   - if the root child_root_id doesn't have any parent roots
1421  */
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423                          u64 child_root_id)
1424 {
1425         struct btrfs_path path;
1426         struct btrfs_key key;
1427         struct extent_buffer *leaf;
1428         int has_parent = 0;
1429         int ret;
1430
1431         btrfs_init_path(&path);
1432
1433         key.objectid = parent_root_id;
1434         key.type = BTRFS_ROOT_REF_KEY;
1435         key.offset = child_root_id;
1436         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437                                 0, 0);
1438         if (ret < 0)
1439                 return ret;
1440         btrfs_release_path(&path);
1441         if (!ret)
1442                 return 1;
1443
1444         key.objectid = child_root_id;
1445         key.type = BTRFS_ROOT_BACKREF_KEY;
1446         key.offset = 0;
1447         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448                                 0, 0);
1449         if (ret < 0)
1450                 goto out;
1451
1452         while (1) {
1453                 leaf = path.nodes[0];
1454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456                         if (ret)
1457                                 break;
1458                         leaf = path.nodes[0];
1459                 }
1460
1461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462                 if (key.objectid != child_root_id ||
1463                     key.type != BTRFS_ROOT_BACKREF_KEY)
1464                         break;
1465
1466                 has_parent = 1;
1467
1468                 if (key.offset == parent_root_id) {
1469                         btrfs_release_path(&path);
1470                         return 1;
1471                 }
1472
1473                 path.slots[0]++;
1474         }
1475 out:
1476         btrfs_release_path(&path);
1477         if (ret < 0)
1478                 return ret;
1479         return has_parent ? 0 : 2;
1480 }
1481
1482 static int process_dir_item(struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (cur + sizeof(*di) + name_len > total ||
1517                     name_len > BTRFS_NAME_LEN) {
1518                         error = REF_ERR_NAME_TOO_LONG;
1519
1520                         if (cur + sizeof(*di) > total)
1521                                 break;
1522                         len = min_t(u32, total - cur - sizeof(*di),
1523                                     BTRFS_NAME_LEN);
1524                 } else {
1525                         len = name_len;
1526                         error = 0;
1527                 }
1528
1529                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530
1531                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1532                         add_inode_backref(inode_cache, location.objectid,
1533                                           key->objectid, key->offset, namebuf,
1534                                           len, filetype, key->type, error);
1535                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1536                         add_inode_backref(root_cache, location.objectid,
1537                                           key->objectid, key->offset,
1538                                           namebuf, len, filetype,
1539                                           key->type, error);
1540                 } else {
1541                         fprintf(stderr, "invalid location in dir item %u\n",
1542                                 location.type);
1543                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1544                                           key->objectid, key->offset, namebuf,
1545                                           len, filetype, key->type, error);
1546                 }
1547
1548                 len = sizeof(*di) + name_len + data_len;
1549                 di = (struct btrfs_dir_item *)((char *)di + len);
1550                 cur += len;
1551         }
1552         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1553                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1554
1555         return 0;
1556 }
1557
1558 static int process_inode_ref(struct extent_buffer *eb,
1559                              int slot, struct btrfs_key *key,
1560                              struct shared_node *active_node)
1561 {
1562         u32 total;
1563         u32 cur = 0;
1564         u32 len;
1565         u32 name_len;
1566         u64 index;
1567         int error;
1568         struct cache_tree *inode_cache;
1569         struct btrfs_inode_ref *ref;
1570         char namebuf[BTRFS_NAME_LEN];
1571
1572         inode_cache = &active_node->inode_cache;
1573
1574         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1575         total = btrfs_item_size_nr(eb, slot);
1576         while (cur < total) {
1577                 name_len = btrfs_inode_ref_name_len(eb, ref);
1578                 index = btrfs_inode_ref_index(eb, ref);
1579
1580                 /* inode_ref + namelen should not cross item boundary */
1581                 if (cur + sizeof(*ref) + name_len > total ||
1582                     name_len > BTRFS_NAME_LEN) {
1583                         if (total < cur + sizeof(*ref))
1584                                 break;
1585
1586                         /* Still try to read out the remaining part */
1587                         len = min_t(u32, total - cur - sizeof(*ref),
1588                                     BTRFS_NAME_LEN);
1589                         error = REF_ERR_NAME_TOO_LONG;
1590                 } else {
1591                         len = name_len;
1592                         error = 0;
1593                 }
1594
1595                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1596                 add_inode_backref(inode_cache, key->objectid, key->offset,
1597                                   index, namebuf, len, 0, key->type, error);
1598
1599                 len = sizeof(*ref) + name_len;
1600                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1601                 cur += len;
1602         }
1603         return 0;
1604 }
1605
1606 static int process_inode_extref(struct extent_buffer *eb,
1607                                 int slot, struct btrfs_key *key,
1608                                 struct shared_node *active_node)
1609 {
1610         u32 total;
1611         u32 cur = 0;
1612         u32 len;
1613         u32 name_len;
1614         u64 index;
1615         u64 parent;
1616         int error;
1617         struct cache_tree *inode_cache;
1618         struct btrfs_inode_extref *extref;
1619         char namebuf[BTRFS_NAME_LEN];
1620
1621         inode_cache = &active_node->inode_cache;
1622
1623         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1624         total = btrfs_item_size_nr(eb, slot);
1625         while (cur < total) {
1626                 name_len = btrfs_inode_extref_name_len(eb, extref);
1627                 index = btrfs_inode_extref_index(eb, extref);
1628                 parent = btrfs_inode_extref_parent(eb, extref);
1629                 if (name_len <= BTRFS_NAME_LEN) {
1630                         len = name_len;
1631                         error = 0;
1632                 } else {
1633                         len = BTRFS_NAME_LEN;
1634                         error = REF_ERR_NAME_TOO_LONG;
1635                 }
1636                 read_extent_buffer(eb, namebuf,
1637                                    (unsigned long)(extref + 1), len);
1638                 add_inode_backref(inode_cache, key->objectid, parent,
1639                                   index, namebuf, len, 0, key->type, error);
1640
1641                 len = sizeof(*extref) + name_len;
1642                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1643                 cur += len;
1644         }
1645         return 0;
1646
1647 }
1648
1649 static int count_csum_range(struct btrfs_root *root, u64 start,
1650                             u64 len, u64 *found)
1651 {
1652         struct btrfs_key key;
1653         struct btrfs_path path;
1654         struct extent_buffer *leaf;
1655         int ret;
1656         size_t size;
1657         *found = 0;
1658         u64 csum_end;
1659         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1660
1661         btrfs_init_path(&path);
1662
1663         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1664         key.offset = start;
1665         key.type = BTRFS_EXTENT_CSUM_KEY;
1666
1667         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1668                                 &key, &path, 0, 0);
1669         if (ret < 0)
1670                 goto out;
1671         if (ret > 0 && path.slots[0] > 0) {
1672                 leaf = path.nodes[0];
1673                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1674                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1675                     key.type == BTRFS_EXTENT_CSUM_KEY)
1676                         path.slots[0]--;
1677         }
1678
1679         while (len > 0) {
1680                 leaf = path.nodes[0];
1681                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1682                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1683                         if (ret > 0)
1684                                 break;
1685                         else if (ret < 0)
1686                                 goto out;
1687                         leaf = path.nodes[0];
1688                 }
1689
1690                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1691                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1692                     key.type != BTRFS_EXTENT_CSUM_KEY)
1693                         break;
1694
1695                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1696                 if (key.offset >= start + len)
1697                         break;
1698
1699                 if (key.offset > start)
1700                         start = key.offset;
1701
1702                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1703                 csum_end = key.offset + (size / csum_size) *
1704                            root->fs_info->sectorsize;
1705                 if (csum_end > start) {
1706                         size = min(csum_end - start, len);
1707                         len -= size;
1708                         start += size;
1709                         *found += size;
1710                 }
1711
1712                 path.slots[0]++;
1713         }
1714 out:
1715         btrfs_release_path(&path);
1716         if (ret < 0)
1717                 return ret;
1718         return 0;
1719 }
1720
1721 static int process_file_extent(struct btrfs_root *root,
1722                                 struct extent_buffer *eb,
1723                                 int slot, struct btrfs_key *key,
1724                                 struct shared_node *active_node)
1725 {
1726         struct inode_record *rec;
1727         struct btrfs_file_extent_item *fi;
1728         u64 num_bytes = 0;
1729         u64 disk_bytenr = 0;
1730         u64 extent_offset = 0;
1731         u64 mask = root->fs_info->sectorsize - 1;
1732         int extent_type;
1733         int ret;
1734
1735         rec = active_node->current;
1736         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1737         rec->found_file_extent = 1;
1738
1739         if (rec->extent_start == (u64)-1) {
1740                 rec->extent_start = key->offset;
1741                 rec->extent_end = key->offset;
1742         }
1743
1744         if (rec->extent_end > key->offset)
1745                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1746         else if (rec->extent_end < key->offset) {
1747                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1748                                            key->offset - rec->extent_end);
1749                 if (ret < 0)
1750                         return ret;
1751         }
1752
1753         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1754         extent_type = btrfs_file_extent_type(eb, fi);
1755
1756         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1757                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1758                 if (num_bytes == 0)
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 rec->found_size += num_bytes;
1761                 num_bytes = (num_bytes + mask) & ~mask;
1762         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1763                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1764                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1765                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1766                 extent_offset = btrfs_file_extent_offset(eb, fi);
1767                 if (num_bytes == 0 || (num_bytes & mask))
1768                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769                 if (num_bytes + extent_offset >
1770                     btrfs_file_extent_ram_bytes(eb, fi))
1771                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1772                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1773                     (btrfs_file_extent_compression(eb, fi) ||
1774                      btrfs_file_extent_encryption(eb, fi) ||
1775                      btrfs_file_extent_other_encoding(eb, fi)))
1776                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777                 if (disk_bytenr > 0)
1778                         rec->found_size += num_bytes;
1779         } else {
1780                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1781         }
1782         rec->extent_end = key->offset + num_bytes;
1783
1784         /*
1785          * The data reloc tree will copy full extents into its inode and then
1786          * copy the corresponding csums.  Because the extent it copied could be
1787          * a preallocated extent that hasn't been written to yet there may be no
1788          * csums to copy, ergo we won't have csums for our file extent.  This is
1789          * ok so just don't bother checking csums if the inode belongs to the
1790          * data reloc tree.
1791          */
1792         if (disk_bytenr > 0 &&
1793             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1794                 u64 found;
1795                 if (btrfs_file_extent_compression(eb, fi))
1796                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1797                 else
1798                         disk_bytenr += extent_offset;
1799
1800                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1801                 if (ret < 0)
1802                         return ret;
1803                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1804                         if (found > 0)
1805                                 rec->found_csum_item = 1;
1806                         if (found < num_bytes)
1807                                 rec->some_csum_missing = 1;
1808                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1809                         if (found > 0)
1810                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1811                 }
1812         }
1813         return 0;
1814 }
1815
1816 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1817                             struct walk_control *wc)
1818 {
1819         struct btrfs_key key;
1820         u32 nritems;
1821         int i;
1822         int ret = 0;
1823         struct cache_tree *inode_cache;
1824         struct shared_node *active_node;
1825
1826         if (wc->root_level == wc->active_node &&
1827             btrfs_root_refs(&root->root_item) == 0)
1828                 return 0;
1829
1830         active_node = wc->nodes[wc->active_node];
1831         inode_cache = &active_node->inode_cache;
1832         nritems = btrfs_header_nritems(eb);
1833         for (i = 0; i < nritems; i++) {
1834                 btrfs_item_key_to_cpu(eb, &key, i);
1835
1836                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1837                         continue;
1838                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1839                         continue;
1840
1841                 if (active_node->current == NULL ||
1842                     active_node->current->ino < key.objectid) {
1843                         if (active_node->current) {
1844                                 active_node->current->checked = 1;
1845                                 maybe_free_inode_rec(inode_cache,
1846                                                      active_node->current);
1847                         }
1848                         active_node->current = get_inode_rec(inode_cache,
1849                                                              key.objectid, 1);
1850                         BUG_ON(IS_ERR(active_node->current));
1851                 }
1852                 switch (key.type) {
1853                 case BTRFS_DIR_ITEM_KEY:
1854                 case BTRFS_DIR_INDEX_KEY:
1855                         ret = process_dir_item(eb, i, &key, active_node);
1856                         break;
1857                 case BTRFS_INODE_REF_KEY:
1858                         ret = process_inode_ref(eb, i, &key, active_node);
1859                         break;
1860                 case BTRFS_INODE_EXTREF_KEY:
1861                         ret = process_inode_extref(eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_ITEM_KEY:
1864                         ret = process_inode_item(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_EXTENT_DATA_KEY:
1867                         ret = process_file_extent(root, eb, i, &key,
1868                                                   active_node);
1869                         break;
1870                 default:
1871                         break;
1872                 };
1873         }
1874         return ret;
1875 }
1876
1877 struct node_refs {
1878         u64 bytenr[BTRFS_MAX_LEVEL];
1879         u64 refs[BTRFS_MAX_LEVEL];
1880         int need_check[BTRFS_MAX_LEVEL];
1881 };
1882
1883 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1884                              struct node_refs *nrefs, u64 level);
1885 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1886                             unsigned int ext_ref);
1887
1888 /*
1889  * Returns >0  Found error, not fatal, should continue
1890  * Returns <0  Fatal error, must exit the whole check
1891  * Returns 0   No errors found
1892  */
1893 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1894                                struct node_refs *nrefs, int *level, int ext_ref)
1895 {
1896         struct extent_buffer *cur = path->nodes[0];
1897         struct btrfs_key key;
1898         u64 cur_bytenr;
1899         u32 nritems;
1900         u64 first_ino = 0;
1901         int root_level = btrfs_header_level(root->node);
1902         int i;
1903         int ret = 0; /* Final return value */
1904         int err = 0; /* Positive error bitmap */
1905
1906         cur_bytenr = cur->start;
1907
1908         /* skip to first inode item or the first inode number change */
1909         nritems = btrfs_header_nritems(cur);
1910         for (i = 0; i < nritems; i++) {
1911                 btrfs_item_key_to_cpu(cur, &key, i);
1912                 if (i == 0)
1913                         first_ino = key.objectid;
1914                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1915                     (first_ino && first_ino != key.objectid))
1916                         break;
1917         }
1918         if (i == nritems) {
1919                 path->slots[0] = nritems;
1920                 return 0;
1921         }
1922         path->slots[0] = i;
1923
1924 again:
1925         err |= check_inode_item(root, path, ext_ref);
1926
1927         if (err & LAST_ITEM)
1928                 goto out;
1929
1930         /* still have inode items in thie leaf */
1931         if (cur->start == cur_bytenr)
1932                 goto again;
1933
1934         /*
1935          * we have switched to another leaf, above nodes may
1936          * have changed, here walk down the path, if a node
1937          * or leaf is shared, check whether we can skip this
1938          * node or leaf.
1939          */
1940         for (i = root_level; i >= 0; i--) {
1941                 if (path->nodes[i]->start == nrefs->bytenr[i])
1942                         continue;
1943
1944                 ret = update_nodes_refs(root,
1945                                 path->nodes[i]->start,
1946                                 nrefs, i);
1947                 if (ret)
1948                         goto out;
1949
1950                 if (!nrefs->need_check[i]) {
1951                         *level += 1;
1952                         break;
1953                 }
1954         }
1955
1956         for (i = 0; i < *level; i++) {
1957                 free_extent_buffer(path->nodes[i]);
1958                 path->nodes[i] = NULL;
1959         }
1960 out:
1961         err &= ~LAST_ITEM;
1962         if (err && !ret)
1963                 ret = err;
1964         return ret;
1965 }
1966
1967 static void reada_walk_down(struct btrfs_root *root,
1968                             struct extent_buffer *node, int slot)
1969 {
1970         struct btrfs_fs_info *fs_info = root->fs_info;
1971         u64 bytenr;
1972         u64 ptr_gen;
1973         u32 nritems;
1974         u32 blocksize;
1975         int i;
1976         int level;
1977
1978         level = btrfs_header_level(node);
1979         if (level != 1)
1980                 return;
1981
1982         nritems = btrfs_header_nritems(node);
1983         blocksize = fs_info->nodesize;
1984         for (i = slot; i < nritems; i++) {
1985                 bytenr = btrfs_node_blockptr(node, i);
1986                 ptr_gen = btrfs_node_ptr_generation(node, i);
1987                 readahead_tree_block(fs_info, bytenr, blocksize, ptr_gen);
1988         }
1989 }
1990
1991 /*
1992  * Check the child node/leaf by the following condition:
1993  * 1. the first item key of the node/leaf should be the same with the one
1994  *    in parent.
1995  * 2. block in parent node should match the child node/leaf.
1996  * 3. generation of parent node and child's header should be consistent.
1997  *
1998  * Or the child node/leaf pointed by the key in parent is not valid.
1999  *
2000  * We hope to check leaf owner too, but since subvol may share leaves,
2001  * which makes leaf owner check not so strong, key check should be
2002  * sufficient enough for that case.
2003  */
2004 static int check_child_node(struct extent_buffer *parent, int slot,
2005                             struct extent_buffer *child)
2006 {
2007         struct btrfs_key parent_key;
2008         struct btrfs_key child_key;
2009         int ret = 0;
2010
2011         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2012         if (btrfs_header_level(child) == 0)
2013                 btrfs_item_key_to_cpu(child, &child_key, 0);
2014         else
2015                 btrfs_node_key_to_cpu(child, &child_key, 0);
2016
2017         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2018                 ret = -EINVAL;
2019                 fprintf(stderr,
2020                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2021                         parent_key.objectid, parent_key.type, parent_key.offset,
2022                         child_key.objectid, child_key.type, child_key.offset);
2023         }
2024         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2025                 ret = -EINVAL;
2026                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2027                         btrfs_node_blockptr(parent, slot),
2028                         btrfs_header_bytenr(child));
2029         }
2030         if (btrfs_node_ptr_generation(parent, slot) !=
2031             btrfs_header_generation(child)) {
2032                 ret = -EINVAL;
2033                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2034                         btrfs_header_generation(child),
2035                         btrfs_node_ptr_generation(parent, slot));
2036         }
2037         return ret;
2038 }
2039
2040 /*
2041  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2042  * in every fs or file tree check. Here we find its all root ids, and only check
2043  * it in the fs or file tree which has the smallest root id.
2044  */
2045 static int need_check(struct btrfs_root *root, struct ulist *roots)
2046 {
2047         struct rb_node *node;
2048         struct ulist_node *u;
2049
2050         if (roots->nnodes == 1)
2051                 return 1;
2052
2053         node = rb_first(&roots->root);
2054         u = rb_entry(node, struct ulist_node, rb_node);
2055         /*
2056          * current root id is not smallest, we skip it and let it be checked
2057          * in the fs or file tree who hash the smallest root id.
2058          */
2059         if (root->objectid != u->val)
2060                 return 0;
2061
2062         return 1;
2063 }
2064
2065 /*
2066  * for a tree node or leaf, we record its reference count, so later if we still
2067  * process this node or leaf, don't need to compute its reference count again.
2068  */
2069 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2070                              struct node_refs *nrefs, u64 level)
2071 {
2072         int check, ret;
2073         u64 refs;
2074         struct ulist *roots;
2075
2076         if (nrefs->bytenr[level] != bytenr) {
2077                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2078                                        level, 1, &refs, NULL);
2079                 if (ret < 0)
2080                         return ret;
2081
2082                 nrefs->bytenr[level] = bytenr;
2083                 nrefs->refs[level] = refs;
2084                 if (refs > 1) {
2085                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2086                                                    0, &roots);
2087                         if (ret)
2088                                 return -EIO;
2089
2090                         check = need_check(root, roots);
2091                         ulist_free(roots);
2092                         nrefs->need_check[level] = check;
2093                 } else {
2094                         nrefs->need_check[level] = 1;
2095                 }
2096         }
2097
2098         return 0;
2099 }
2100
2101 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2102                           struct walk_control *wc, int *level,
2103                           struct node_refs *nrefs)
2104 {
2105         enum btrfs_tree_block_status status;
2106         u64 bytenr;
2107         u64 ptr_gen;
2108         struct btrfs_fs_info *fs_info = root->fs_info;
2109         struct extent_buffer *next;
2110         struct extent_buffer *cur;
2111         u32 blocksize;
2112         int ret, err = 0;
2113         u64 refs;
2114
2115         WARN_ON(*level < 0);
2116         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2117
2118         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2119                 refs = nrefs->refs[*level];
2120                 ret = 0;
2121         } else {
2122                 ret = btrfs_lookup_extent_info(NULL, root,
2123                                        path->nodes[*level]->start,
2124                                        *level, 1, &refs, NULL);
2125                 if (ret < 0) {
2126                         err = ret;
2127                         goto out;
2128                 }
2129                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2130                 nrefs->refs[*level] = refs;
2131         }
2132
2133         if (refs > 1) {
2134                 ret = enter_shared_node(root, path->nodes[*level]->start,
2135                                         refs, wc, *level);
2136                 if (ret > 0) {
2137                         err = ret;
2138                         goto out;
2139                 }
2140         }
2141
2142         while (*level >= 0) {
2143                 WARN_ON(*level < 0);
2144                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2145                 cur = path->nodes[*level];
2146
2147                 if (btrfs_header_level(cur) != *level)
2148                         WARN_ON(1);
2149
2150                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2151                         break;
2152                 if (*level == 0) {
2153                         ret = process_one_leaf(root, cur, wc);
2154                         if (ret < 0)
2155                                 err = ret;
2156                         break;
2157                 }
2158                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2159                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2160                 blocksize = fs_info->nodesize;
2161
2162                 if (bytenr == nrefs->bytenr[*level - 1]) {
2163                         refs = nrefs->refs[*level - 1];
2164                 } else {
2165                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2166                                         *level - 1, 1, &refs, NULL);
2167                         if (ret < 0) {
2168                                 refs = 0;
2169                         } else {
2170                                 nrefs->bytenr[*level - 1] = bytenr;
2171                                 nrefs->refs[*level - 1] = refs;
2172                         }
2173                 }
2174
2175                 if (refs > 1) {
2176                         ret = enter_shared_node(root, bytenr, refs,
2177                                                 wc, *level - 1);
2178                         if (ret > 0) {
2179                                 path->slots[*level]++;
2180                                 continue;
2181                         }
2182                 }
2183
2184                 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2185                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2186                         free_extent_buffer(next);
2187                         reada_walk_down(root, cur, path->slots[*level]);
2188                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2189                                                ptr_gen);
2190                         if (!extent_buffer_uptodate(next)) {
2191                                 struct btrfs_key node_key;
2192
2193                                 btrfs_node_key_to_cpu(path->nodes[*level],
2194                                                       &node_key,
2195                                                       path->slots[*level]);
2196                                 btrfs_add_corrupt_extent_record(root->fs_info,
2197                                                 &node_key,
2198                                                 path->nodes[*level]->start,
2199                                                 root->fs_info->nodesize,
2200                                                 *level);
2201                                 err = -EIO;
2202                                 goto out;
2203                         }
2204                 }
2205
2206                 ret = check_child_node(cur, path->slots[*level], next);
2207                 if (ret) {
2208                         free_extent_buffer(next);
2209                         err = ret;
2210                         goto out;
2211                 }
2212
2213                 if (btrfs_is_leaf(next))
2214                         status = btrfs_check_leaf(root, NULL, next);
2215                 else
2216                         status = btrfs_check_node(root, NULL, next);
2217                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2218                         free_extent_buffer(next);
2219                         err = -EIO;
2220                         goto out;
2221                 }
2222
2223                 *level = *level - 1;
2224                 free_extent_buffer(path->nodes[*level]);
2225                 path->nodes[*level] = next;
2226                 path->slots[*level] = 0;
2227         }
2228 out:
2229         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2230         return err;
2231 }
2232
2233 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2234                             unsigned int ext_ref);
2235
2236 /*
2237  * Returns >0  Found error, should continue
2238  * Returns <0  Fatal error, must exit the whole check
2239  * Returns 0   No errors found
2240  */
2241 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2242                              int *level, struct node_refs *nrefs, int ext_ref)
2243 {
2244         enum btrfs_tree_block_status status;
2245         u64 bytenr;
2246         u64 ptr_gen;
2247         struct btrfs_fs_info *fs_info = root->fs_info;
2248         struct extent_buffer *next;
2249         struct extent_buffer *cur;
2250         u32 blocksize;
2251         int ret;
2252
2253         WARN_ON(*level < 0);
2254         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2255
2256         ret = update_nodes_refs(root, path->nodes[*level]->start,
2257                                 nrefs, *level);
2258         if (ret < 0)
2259                 return ret;
2260
2261         while (*level >= 0) {
2262                 WARN_ON(*level < 0);
2263                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264                 cur = path->nodes[*level];
2265
2266                 if (btrfs_header_level(cur) != *level)
2267                         WARN_ON(1);
2268
2269                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2270                         break;
2271                 /* Don't forgot to check leaf/node validation */
2272                 if (*level == 0) {
2273                         ret = btrfs_check_leaf(root, NULL, cur);
2274                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2275                                 ret = -EIO;
2276                                 break;
2277                         }
2278                         ret = process_one_leaf_v2(root, path, nrefs,
2279                                                   level, ext_ref);
2280                         break;
2281                 } else {
2282                         ret = btrfs_check_node(root, NULL, cur);
2283                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2284                                 ret = -EIO;
2285                                 break;
2286                         }
2287                 }
2288                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2290                 blocksize = fs_info->nodesize;
2291
2292                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2293                 if (ret)
2294                         break;
2295                 if (!nrefs->need_check[*level - 1]) {
2296                         path->slots[*level]++;
2297                         continue;
2298                 }
2299
2300                 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2301                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2302                         free_extent_buffer(next);
2303                         reada_walk_down(root, cur, path->slots[*level]);
2304                         next = read_tree_block(fs_info, bytenr, blocksize,
2305                                                ptr_gen);
2306                         if (!extent_buffer_uptodate(next)) {
2307                                 struct btrfs_key node_key;
2308
2309                                 btrfs_node_key_to_cpu(path->nodes[*level],
2310                                                       &node_key,
2311                                                       path->slots[*level]);
2312                                 btrfs_add_corrupt_extent_record(fs_info,
2313                                                 &node_key,
2314                                                 path->nodes[*level]->start,
2315                                                 fs_info->nodesize,
2316                                                 *level);
2317                                 ret = -EIO;
2318                                 break;
2319                         }
2320                 }
2321
2322                 ret = check_child_node(cur, path->slots[*level], next);
2323                 if (ret < 0) 
2324                         break;
2325
2326                 if (btrfs_is_leaf(next))
2327                         status = btrfs_check_leaf(root, NULL, next);
2328                 else
2329                         status = btrfs_check_node(root, NULL, next);
2330                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2331                         free_extent_buffer(next);
2332                         ret = -EIO;
2333                         break;
2334                 }
2335
2336                 *level = *level - 1;
2337                 free_extent_buffer(path->nodes[*level]);
2338                 path->nodes[*level] = next;
2339                 path->slots[*level] = 0;
2340         }
2341         return ret;
2342 }
2343
2344 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2345                         struct walk_control *wc, int *level)
2346 {
2347         int i;
2348         struct extent_buffer *leaf;
2349
2350         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2351                 leaf = path->nodes[i];
2352                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2353                         path->slots[i]++;
2354                         *level = i;
2355                         return 0;
2356                 } else {
2357                         free_extent_buffer(path->nodes[*level]);
2358                         path->nodes[*level] = NULL;
2359                         BUG_ON(*level > wc->active_node);
2360                         if (*level == wc->active_node)
2361                                 leave_shared_node(root, wc, *level);
2362                         *level = i + 1;
2363                 }
2364         }
2365         return 1;
2366 }
2367
2368 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2369                            int *level)
2370 {
2371         int i;
2372         struct extent_buffer *leaf;
2373
2374         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2375                 leaf = path->nodes[i];
2376                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2377                         path->slots[i]++;
2378                         *level = i;
2379                         return 0;
2380                 } else {
2381                         free_extent_buffer(path->nodes[*level]);
2382                         path->nodes[*level] = NULL;
2383                         *level = i + 1;
2384                 }
2385         }
2386         return 1;
2387 }
2388
2389 static int check_root_dir(struct inode_record *rec)
2390 {
2391         struct inode_backref *backref;
2392         int ret = -1;
2393
2394         if (!rec->found_inode_item || rec->errors)
2395                 goto out;
2396         if (rec->nlink != 1 || rec->found_link != 0)
2397                 goto out;
2398         if (list_empty(&rec->backrefs))
2399                 goto out;
2400         backref = to_inode_backref(rec->backrefs.next);
2401         if (!backref->found_inode_ref)
2402                 goto out;
2403         if (backref->index != 0 || backref->namelen != 2 ||
2404             memcmp(backref->name, "..", 2))
2405                 goto out;
2406         if (backref->found_dir_index || backref->found_dir_item)
2407                 goto out;
2408         ret = 0;
2409 out:
2410         return ret;
2411 }
2412
2413 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2414                               struct btrfs_root *root, struct btrfs_path *path,
2415                               struct inode_record *rec)
2416 {
2417         struct btrfs_inode_item *ei;
2418         struct btrfs_key key;
2419         int ret;
2420
2421         key.objectid = rec->ino;
2422         key.type = BTRFS_INODE_ITEM_KEY;
2423         key.offset = (u64)-1;
2424
2425         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2426         if (ret < 0)
2427                 goto out;
2428         if (ret) {
2429                 if (!path->slots[0]) {
2430                         ret = -ENOENT;
2431                         goto out;
2432                 }
2433                 path->slots[0]--;
2434                 ret = 0;
2435         }
2436         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2437         if (key.objectid != rec->ino) {
2438                 ret = -ENOENT;
2439                 goto out;
2440         }
2441
2442         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2443                             struct btrfs_inode_item);
2444         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2445         btrfs_mark_buffer_dirty(path->nodes[0]);
2446         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2447         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2448                root->root_key.objectid);
2449 out:
2450         btrfs_release_path(path);
2451         return ret;
2452 }
2453
2454 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2455                                     struct btrfs_root *root,
2456                                     struct btrfs_path *path,
2457                                     struct inode_record *rec)
2458 {
2459         int ret;
2460
2461         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2462         btrfs_release_path(path);
2463         if (!ret)
2464                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2465         return ret;
2466 }
2467
2468 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2469                                struct btrfs_root *root,
2470                                struct btrfs_path *path,
2471                                struct inode_record *rec)
2472 {
2473         struct btrfs_inode_item *ei;
2474         struct btrfs_key key;
2475         int ret = 0;
2476
2477         key.objectid = rec->ino;
2478         key.type = BTRFS_INODE_ITEM_KEY;
2479         key.offset = 0;
2480
2481         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2482         if (ret) {
2483                 if (ret > 0)
2484                         ret = -ENOENT;
2485                 goto out;
2486         }
2487
2488         /* Since ret == 0, no need to check anything */
2489         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2490                             struct btrfs_inode_item);
2491         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2492         btrfs_mark_buffer_dirty(path->nodes[0]);
2493         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2494         printf("reset nbytes for ino %llu root %llu\n",
2495                rec->ino, root->root_key.objectid);
2496 out:
2497         btrfs_release_path(path);
2498         return ret;
2499 }
2500
2501 static int add_missing_dir_index(struct btrfs_root *root,
2502                                  struct cache_tree *inode_cache,
2503                                  struct inode_record *rec,
2504                                  struct inode_backref *backref)
2505 {
2506         struct btrfs_path path;
2507         struct btrfs_trans_handle *trans;
2508         struct btrfs_dir_item *dir_item;
2509         struct extent_buffer *leaf;
2510         struct btrfs_key key;
2511         struct btrfs_disk_key disk_key;
2512         struct inode_record *dir_rec;
2513         unsigned long name_ptr;
2514         u32 data_size = sizeof(*dir_item) + backref->namelen;
2515         int ret;
2516
2517         trans = btrfs_start_transaction(root, 1);
2518         if (IS_ERR(trans))
2519                 return PTR_ERR(trans);
2520
2521         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2522                 (unsigned long long)rec->ino);
2523
2524         btrfs_init_path(&path);
2525         key.objectid = backref->dir;
2526         key.type = BTRFS_DIR_INDEX_KEY;
2527         key.offset = backref->index;
2528         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2529         BUG_ON(ret);
2530
2531         leaf = path.nodes[0];
2532         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2533
2534         disk_key.objectid = cpu_to_le64(rec->ino);
2535         disk_key.type = BTRFS_INODE_ITEM_KEY;
2536         disk_key.offset = 0;
2537
2538         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2539         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2540         btrfs_set_dir_data_len(leaf, dir_item, 0);
2541         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2542         name_ptr = (unsigned long)(dir_item + 1);
2543         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2544         btrfs_mark_buffer_dirty(leaf);
2545         btrfs_release_path(&path);
2546         btrfs_commit_transaction(trans, root);
2547
2548         backref->found_dir_index = 1;
2549         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2550         BUG_ON(IS_ERR(dir_rec));
2551         if (!dir_rec)
2552                 return 0;
2553         dir_rec->found_size += backref->namelen;
2554         if (dir_rec->found_size == dir_rec->isize &&
2555             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2556                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2557         if (dir_rec->found_size != dir_rec->isize)
2558                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2559
2560         return 0;
2561 }
2562
2563 static int delete_dir_index(struct btrfs_root *root,
2564                             struct inode_backref *backref)
2565 {
2566         struct btrfs_trans_handle *trans;
2567         struct btrfs_dir_item *di;
2568         struct btrfs_path path;
2569         int ret = 0;
2570
2571         trans = btrfs_start_transaction(root, 1);
2572         if (IS_ERR(trans))
2573                 return PTR_ERR(trans);
2574
2575         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2576                 (unsigned long long)backref->dir,
2577                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2578                 (unsigned long long)root->objectid);
2579
2580         btrfs_init_path(&path);
2581         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2582                                     backref->name, backref->namelen,
2583                                     backref->index, -1);
2584         if (IS_ERR(di)) {
2585                 ret = PTR_ERR(di);
2586                 btrfs_release_path(&path);
2587                 btrfs_commit_transaction(trans, root);
2588                 if (ret == -ENOENT)
2589                         return 0;
2590                 return ret;
2591         }
2592
2593         if (!di)
2594                 ret = btrfs_del_item(trans, root, &path);
2595         else
2596                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2597         BUG_ON(ret);
2598         btrfs_release_path(&path);
2599         btrfs_commit_transaction(trans, root);
2600         return ret;
2601 }
2602
2603 static int create_inode_item(struct btrfs_root *root,
2604                              struct inode_record *rec,
2605                              int root_dir)
2606 {
2607         struct btrfs_trans_handle *trans;
2608         struct btrfs_inode_item inode_item;
2609         time_t now = time(NULL);
2610         int ret;
2611
2612         trans = btrfs_start_transaction(root, 1);
2613         if (IS_ERR(trans)) {
2614                 ret = PTR_ERR(trans);
2615                 return ret;
2616         }
2617
2618         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2619                 "be incomplete, please check permissions and content after "
2620                 "the fsck completes.\n", (unsigned long long)root->objectid,
2621                 (unsigned long long)rec->ino);
2622
2623         memset(&inode_item, 0, sizeof(inode_item));
2624         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2625         if (root_dir)
2626                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2627         else
2628                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2629         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2630         if (rec->found_dir_item) {
2631                 if (rec->found_file_extent)
2632                         fprintf(stderr, "root %llu inode %llu has both a dir "
2633                                 "item and extents, unsure if it is a dir or a "
2634                                 "regular file so setting it as a directory\n",
2635                                 (unsigned long long)root->objectid,
2636                                 (unsigned long long)rec->ino);
2637                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2638                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2639         } else if (!rec->found_dir_item) {
2640                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2641                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2642         }
2643         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2644         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2645         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2646         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2647         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2648         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2649         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2650         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2651
2652         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2653         BUG_ON(ret);
2654         btrfs_commit_transaction(trans, root);
2655         return 0;
2656 }
2657
2658 static int repair_inode_backrefs(struct btrfs_root *root,
2659                                  struct inode_record *rec,
2660                                  struct cache_tree *inode_cache,
2661                                  int delete)
2662 {
2663         struct inode_backref *tmp, *backref;
2664         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2665         int ret = 0;
2666         int repaired = 0;
2667
2668         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2669                 if (!delete && rec->ino == root_dirid) {
2670                         if (!rec->found_inode_item) {
2671                                 ret = create_inode_item(root, rec, 1);
2672                                 if (ret)
2673                                         break;
2674                                 repaired++;
2675                         }
2676                 }
2677
2678                 /* Index 0 for root dir's are special, don't mess with it */
2679                 if (rec->ino == root_dirid && backref->index == 0)
2680                         continue;
2681
2682                 if (delete &&
2683                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2684                      (backref->found_dir_index && backref->found_inode_ref &&
2685                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2686                         ret = delete_dir_index(root, backref);
2687                         if (ret)
2688                                 break;
2689                         repaired++;
2690                         list_del(&backref->list);
2691                         free(backref);
2692                         continue;
2693                 }
2694
2695                 if (!delete && !backref->found_dir_index &&
2696                     backref->found_dir_item && backref->found_inode_ref) {
2697                         ret = add_missing_dir_index(root, inode_cache, rec,
2698                                                     backref);
2699                         if (ret)
2700                                 break;
2701                         repaired++;
2702                         if (backref->found_dir_item &&
2703                             backref->found_dir_index) {
2704                                 if (!backref->errors &&
2705                                     backref->found_inode_ref) {
2706                                         list_del(&backref->list);
2707                                         free(backref);
2708                                         continue;
2709                                 }
2710                         }
2711                 }
2712
2713                 if (!delete && (!backref->found_dir_index &&
2714                                 !backref->found_dir_item &&
2715                                 backref->found_inode_ref)) {
2716                         struct btrfs_trans_handle *trans;
2717                         struct btrfs_key location;
2718
2719                         ret = check_dir_conflict(root, backref->name,
2720                                                  backref->namelen,
2721                                                  backref->dir,
2722                                                  backref->index);
2723                         if (ret) {
2724                                 /*
2725                                  * let nlink fixing routine to handle it,
2726                                  * which can do it better.
2727                                  */
2728                                 ret = 0;
2729                                 break;
2730                         }
2731                         location.objectid = rec->ino;
2732                         location.type = BTRFS_INODE_ITEM_KEY;
2733                         location.offset = 0;
2734
2735                         trans = btrfs_start_transaction(root, 1);
2736                         if (IS_ERR(trans)) {
2737                                 ret = PTR_ERR(trans);
2738                                 break;
2739                         }
2740                         fprintf(stderr, "adding missing dir index/item pair "
2741                                 "for inode %llu\n",
2742                                 (unsigned long long)rec->ino);
2743                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2744                                                     backref->namelen,
2745                                                     backref->dir, &location,
2746                                                     imode_to_type(rec->imode),
2747                                                     backref->index);
2748                         BUG_ON(ret);
2749                         btrfs_commit_transaction(trans, root);
2750                         repaired++;
2751                 }
2752
2753                 if (!delete && (backref->found_inode_ref &&
2754                                 backref->found_dir_index &&
2755                                 backref->found_dir_item &&
2756                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2757                                 !rec->found_inode_item)) {
2758                         ret = create_inode_item(root, rec, 0);
2759                         if (ret)
2760                                 break;
2761                         repaired++;
2762                 }
2763
2764         }
2765         return ret ? ret : repaired;
2766 }
2767
2768 /*
2769  * To determine the file type for nlink/inode_item repair
2770  *
2771  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2772  * Return -ENOENT if file type is not found.
2773  */
2774 static int find_file_type(struct inode_record *rec, u8 *type)
2775 {
2776         struct inode_backref *backref;
2777
2778         /* For inode item recovered case */
2779         if (rec->found_inode_item) {
2780                 *type = imode_to_type(rec->imode);
2781                 return 0;
2782         }
2783
2784         list_for_each_entry(backref, &rec->backrefs, list) {
2785                 if (backref->found_dir_index || backref->found_dir_item) {
2786                         *type = backref->filetype;
2787                         return 0;
2788                 }
2789         }
2790         return -ENOENT;
2791 }
2792
2793 /*
2794  * To determine the file name for nlink repair
2795  *
2796  * Return 0 if file name is found, set name and namelen.
2797  * Return -ENOENT if file name is not found.
2798  */
2799 static int find_file_name(struct inode_record *rec,
2800                           char *name, int *namelen)
2801 {
2802         struct inode_backref *backref;
2803
2804         list_for_each_entry(backref, &rec->backrefs, list) {
2805                 if (backref->found_dir_index || backref->found_dir_item ||
2806                     backref->found_inode_ref) {
2807                         memcpy(name, backref->name, backref->namelen);
2808                         *namelen = backref->namelen;
2809                         return 0;
2810                 }
2811         }
2812         return -ENOENT;
2813 }
2814
2815 /* Reset the nlink of the inode to the correct one */
2816 static int reset_nlink(struct btrfs_trans_handle *trans,
2817                        struct btrfs_root *root,
2818                        struct btrfs_path *path,
2819                        struct inode_record *rec)
2820 {
2821         struct inode_backref *backref;
2822         struct inode_backref *tmp;
2823         struct btrfs_key key;
2824         struct btrfs_inode_item *inode_item;
2825         int ret = 0;
2826
2827         /* We don't believe this either, reset it and iterate backref */
2828         rec->found_link = 0;
2829
2830         /* Remove all backref including the valid ones */
2831         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2832                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2833                                    backref->index, backref->name,
2834                                    backref->namelen, 0);
2835                 if (ret < 0)
2836                         goto out;
2837
2838                 /* remove invalid backref, so it won't be added back */
2839                 if (!(backref->found_dir_index &&
2840                       backref->found_dir_item &&
2841                       backref->found_inode_ref)) {
2842                         list_del(&backref->list);
2843                         free(backref);
2844                 } else {
2845                         rec->found_link++;
2846                 }
2847         }
2848
2849         /* Set nlink to 0 */
2850         key.objectid = rec->ino;
2851         key.type = BTRFS_INODE_ITEM_KEY;
2852         key.offset = 0;
2853         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2854         if (ret < 0)
2855                 goto out;
2856         if (ret > 0) {
2857                 ret = -ENOENT;
2858                 goto out;
2859         }
2860         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2861                                     struct btrfs_inode_item);
2862         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2863         btrfs_mark_buffer_dirty(path->nodes[0]);
2864         btrfs_release_path(path);
2865
2866         /*
2867          * Add back valid inode_ref/dir_item/dir_index,
2868          * add_link() will handle the nlink inc, so new nlink must be correct
2869          */
2870         list_for_each_entry(backref, &rec->backrefs, list) {
2871                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2872                                      backref->name, backref->namelen,
2873                                      backref->filetype, &backref->index, 1);
2874                 if (ret < 0)
2875                         goto out;
2876         }
2877 out:
2878         btrfs_release_path(path);
2879         return ret;
2880 }
2881
2882 static int get_highest_inode(struct btrfs_trans_handle *trans,
2883                                 struct btrfs_root *root,
2884                                 struct btrfs_path *path,
2885                                 u64 *highest_ino)
2886 {
2887         struct btrfs_key key, found_key;
2888         int ret;
2889
2890         btrfs_init_path(path);
2891         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2892         key.offset = -1;
2893         key.type = BTRFS_INODE_ITEM_KEY;
2894         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2895         if (ret == 1) {
2896                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2897                                 path->slots[0] - 1);
2898                 *highest_ino = found_key.objectid;
2899                 ret = 0;
2900         }
2901         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2902                 ret = -EOVERFLOW;
2903         btrfs_release_path(path);
2904         return ret;
2905 }
2906
2907 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2908                                struct btrfs_root *root,
2909                                struct btrfs_path *path,
2910                                struct inode_record *rec)
2911 {
2912         char *dir_name = "lost+found";
2913         char namebuf[BTRFS_NAME_LEN] = {0};
2914         u64 lost_found_ino;
2915         u32 mode = 0700;
2916         u8 type = 0;
2917         int namelen = 0;
2918         int name_recovered = 0;
2919         int type_recovered = 0;
2920         int ret = 0;
2921
2922         /*
2923          * Get file name and type first before these invalid inode ref
2924          * are deleted by remove_all_invalid_backref()
2925          */
2926         name_recovered = !find_file_name(rec, namebuf, &namelen);
2927         type_recovered = !find_file_type(rec, &type);
2928
2929         if (!name_recovered) {
2930                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2931                        rec->ino, rec->ino);
2932                 namelen = count_digits(rec->ino);
2933                 sprintf(namebuf, "%llu", rec->ino);
2934                 name_recovered = 1;
2935         }
2936         if (!type_recovered) {
2937                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2938                        rec->ino);
2939                 type = BTRFS_FT_REG_FILE;
2940                 type_recovered = 1;
2941         }
2942
2943         ret = reset_nlink(trans, root, path, rec);
2944         if (ret < 0) {
2945                 fprintf(stderr,
2946                         "Failed to reset nlink for inode %llu: %s\n",
2947                         rec->ino, strerror(-ret));
2948                 goto out;
2949         }
2950
2951         if (rec->found_link == 0) {
2952                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2953                 if (ret < 0)
2954                         goto out;
2955                 lost_found_ino++;
2956                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2957                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2958                                   mode);
2959                 if (ret < 0) {
2960                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2961                                 dir_name, strerror(-ret));
2962                         goto out;
2963                 }
2964                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2965                                      namebuf, namelen, type, NULL, 1);
2966                 /*
2967                  * Add ".INO" suffix several times to handle case where
2968                  * "FILENAME.INO" is already taken by another file.
2969                  */
2970                 while (ret == -EEXIST) {
2971                         /*
2972                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2973                          */
2974                         if (namelen + count_digits(rec->ino) + 1 >
2975                             BTRFS_NAME_LEN) {
2976                                 ret = -EFBIG;
2977                                 goto out;
2978                         }
2979                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2980                                  ".%llu", rec->ino);
2981                         namelen += count_digits(rec->ino) + 1;
2982                         ret = btrfs_add_link(trans, root, rec->ino,
2983                                              lost_found_ino, namebuf,
2984                                              namelen, type, NULL, 1);
2985                 }
2986                 if (ret < 0) {
2987                         fprintf(stderr,
2988                                 "Failed to link the inode %llu to %s dir: %s\n",
2989                                 rec->ino, dir_name, strerror(-ret));
2990                         goto out;
2991                 }
2992                 /*
2993                  * Just increase the found_link, don't actually add the
2994                  * backref. This will make things easier and this inode
2995                  * record will be freed after the repair is done.
2996                  * So fsck will not report problem about this inode.
2997                  */
2998                 rec->found_link++;
2999                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3000                        namelen, namebuf, dir_name);
3001         }
3002         printf("Fixed the nlink of inode %llu\n", rec->ino);
3003 out:
3004         /*
3005          * Clear the flag anyway, or we will loop forever for the same inode
3006          * as it will not be removed from the bad inode list and the dead loop
3007          * happens.
3008          */
3009         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3010         btrfs_release_path(path);
3011         return ret;
3012 }
3013
3014 /*
3015  * Check if there is any normal(reg or prealloc) file extent for given
3016  * ino.
3017  * This is used to determine the file type when neither its dir_index/item or
3018  * inode_item exists.
3019  *
3020  * This will *NOT* report error, if any error happens, just consider it does
3021  * not have any normal file extent.
3022  */
3023 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3024 {
3025         struct btrfs_path path;
3026         struct btrfs_key key;
3027         struct btrfs_key found_key;
3028         struct btrfs_file_extent_item *fi;
3029         u8 type;
3030         int ret = 0;
3031
3032         btrfs_init_path(&path);
3033         key.objectid = ino;
3034         key.type = BTRFS_EXTENT_DATA_KEY;
3035         key.offset = 0;
3036
3037         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3038         if (ret < 0) {
3039                 ret = 0;
3040                 goto out;
3041         }
3042         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3043                 ret = btrfs_next_leaf(root, &path);
3044                 if (ret) {
3045                         ret = 0;
3046                         goto out;
3047                 }
3048         }
3049         while (1) {
3050                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3051                                       path.slots[0]);
3052                 if (found_key.objectid != ino ||
3053                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3054                         break;
3055                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3056                                     struct btrfs_file_extent_item);
3057                 type = btrfs_file_extent_type(path.nodes[0], fi);
3058                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3059                         ret = 1;
3060                         goto out;
3061                 }
3062         }
3063 out:
3064         btrfs_release_path(&path);
3065         return ret;
3066 }
3067
3068 static u32 btrfs_type_to_imode(u8 type)
3069 {
3070         static u32 imode_by_btrfs_type[] = {
3071                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3072                 [BTRFS_FT_DIR]          = S_IFDIR,
3073                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3074                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3075                 [BTRFS_FT_FIFO]         = S_IFIFO,
3076                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3077                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3078         };
3079
3080         return imode_by_btrfs_type[(type)];
3081 }
3082
3083 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3084                                 struct btrfs_root *root,
3085                                 struct btrfs_path *path,
3086                                 struct inode_record *rec)
3087 {
3088         u8 filetype;
3089         u32 mode = 0700;
3090         int type_recovered = 0;
3091         int ret = 0;
3092
3093         printf("Trying to rebuild inode:%llu\n", rec->ino);
3094
3095         type_recovered = !find_file_type(rec, &filetype);
3096
3097         /*
3098          * Try to determine inode type if type not found.
3099          *
3100          * For found regular file extent, it must be FILE.
3101          * For found dir_item/index, it must be DIR.
3102          *
3103          * For undetermined one, use FILE as fallback.
3104          *
3105          * TODO:
3106          * 1. If found backref(inode_index/item is already handled) to it,
3107          *    it must be DIR.
3108          *    Need new inode-inode ref structure to allow search for that.
3109          */
3110         if (!type_recovered) {
3111                 if (rec->found_file_extent &&
3112                     find_normal_file_extent(root, rec->ino)) {
3113                         type_recovered = 1;
3114                         filetype = BTRFS_FT_REG_FILE;
3115                 } else if (rec->found_dir_item) {
3116                         type_recovered = 1;
3117                         filetype = BTRFS_FT_DIR;
3118                 } else if (!list_empty(&rec->orphan_extents)) {
3119                         type_recovered = 1;
3120                         filetype = BTRFS_FT_REG_FILE;
3121                 } else{
3122                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3123                                rec->ino);
3124                         type_recovered = 1;
3125                         filetype = BTRFS_FT_REG_FILE;
3126                 }
3127         }
3128
3129         ret = btrfs_new_inode(trans, root, rec->ino,
3130                               mode | btrfs_type_to_imode(filetype));
3131         if (ret < 0)
3132                 goto out;
3133
3134         /*
3135          * Here inode rebuild is done, we only rebuild the inode item,
3136          * don't repair the nlink(like move to lost+found).
3137          * That is the job of nlink repair.
3138          *
3139          * We just fill the record and return
3140          */
3141         rec->found_dir_item = 1;
3142         rec->imode = mode | btrfs_type_to_imode(filetype);
3143         rec->nlink = 0;
3144         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3145         /* Ensure the inode_nlinks repair function will be called */
3146         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3147 out:
3148         return ret;
3149 }
3150
3151 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3152                                       struct btrfs_root *root,
3153                                       struct btrfs_path *path,
3154                                       struct inode_record *rec)
3155 {
3156         struct orphan_data_extent *orphan;
3157         struct orphan_data_extent *tmp;
3158         int ret = 0;
3159
3160         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3161                 /*
3162                  * Check for conflicting file extents
3163                  *
3164                  * Here we don't know whether the extents is compressed or not,
3165                  * so we can only assume it not compressed nor data offset,
3166                  * and use its disk_len as extent length.
3167                  */
3168                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3169                                        orphan->offset, orphan->disk_len, 0);
3170                 btrfs_release_path(path);
3171                 if (ret < 0)
3172                         goto out;
3173                 if (!ret) {
3174                         fprintf(stderr,
3175                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3176                                 orphan->disk_bytenr, orphan->disk_len);
3177                         ret = btrfs_free_extent(trans,
3178                                         root->fs_info->extent_root,
3179                                         orphan->disk_bytenr, orphan->disk_len,
3180                                         0, root->objectid, orphan->objectid,
3181                                         orphan->offset);
3182                         if (ret < 0)
3183                                 goto out;
3184                 }
3185                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3186                                 orphan->offset, orphan->disk_bytenr,
3187                                 orphan->disk_len, orphan->disk_len);
3188                 if (ret < 0)
3189                         goto out;
3190
3191                 /* Update file size info */
3192                 rec->found_size += orphan->disk_len;
3193                 if (rec->found_size == rec->nbytes)
3194                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3195
3196                 /* Update the file extent hole info too */
3197                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3198                                            orphan->disk_len);
3199                 if (ret < 0)
3200                         goto out;
3201                 if (RB_EMPTY_ROOT(&rec->holes))
3202                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3203
3204                 list_del(&orphan->list);
3205                 free(orphan);
3206         }
3207         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3208 out:
3209         return ret;
3210 }
3211
3212 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3213                                         struct btrfs_root *root,
3214                                         struct btrfs_path *path,
3215                                         struct inode_record *rec)
3216 {
3217         struct rb_node *node;
3218         struct file_extent_hole *hole;
3219         int found = 0;
3220         int ret = 0;
3221
3222         node = rb_first(&rec->holes);
3223
3224         while (node) {
3225                 found = 1;
3226                 hole = rb_entry(node, struct file_extent_hole, node);
3227                 ret = btrfs_punch_hole(trans, root, rec->ino,
3228                                        hole->start, hole->len);
3229                 if (ret < 0)
3230                         goto out;
3231                 ret = del_file_extent_hole(&rec->holes, hole->start,
3232                                            hole->len);
3233                 if (ret < 0)
3234                         goto out;
3235                 if (RB_EMPTY_ROOT(&rec->holes))
3236                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3237                 node = rb_first(&rec->holes);
3238         }
3239         /* special case for a file losing all its file extent */
3240         if (!found) {
3241                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3242                                        round_up(rec->isize,
3243                                                 root->fs_info->sectorsize));
3244                 if (ret < 0)
3245                         goto out;
3246         }
3247         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3248                rec->ino, root->objectid);
3249 out:
3250         return ret;
3251 }
3252
3253 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3254 {
3255         struct btrfs_trans_handle *trans;
3256         struct btrfs_path path;
3257         int ret = 0;
3258
3259         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3260                              I_ERR_NO_ORPHAN_ITEM |
3261                              I_ERR_LINK_COUNT_WRONG |
3262                              I_ERR_NO_INODE_ITEM |
3263                              I_ERR_FILE_EXTENT_ORPHAN |
3264                              I_ERR_FILE_EXTENT_DISCOUNT|
3265                              I_ERR_FILE_NBYTES_WRONG)))
3266                 return rec->errors;
3267
3268         /*
3269          * For nlink repair, it may create a dir and add link, so
3270          * 2 for parent(256)'s dir_index and dir_item
3271          * 2 for lost+found dir's inode_item and inode_ref
3272          * 1 for the new inode_ref of the file
3273          * 2 for lost+found dir's dir_index and dir_item for the file
3274          */
3275         trans = btrfs_start_transaction(root, 7);
3276         if (IS_ERR(trans))
3277                 return PTR_ERR(trans);
3278
3279         btrfs_init_path(&path);
3280         if (rec->errors & I_ERR_NO_INODE_ITEM)
3281                 ret = repair_inode_no_item(trans, root, &path, rec);
3282         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3283                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3284         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3285                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3286         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3287                 ret = repair_inode_isize(trans, root, &path, rec);
3288         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3289                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3290         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3291                 ret = repair_inode_nlinks(trans, root, &path, rec);
3292         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3293                 ret = repair_inode_nbytes(trans, root, &path, rec);
3294         btrfs_commit_transaction(trans, root);
3295         btrfs_release_path(&path);
3296         return ret;
3297 }
3298
3299 static int check_inode_recs(struct btrfs_root *root,
3300                             struct cache_tree *inode_cache)
3301 {
3302         struct cache_extent *cache;
3303         struct ptr_node *node;
3304         struct inode_record *rec;
3305         struct inode_backref *backref;
3306         int stage = 0;
3307         int ret = 0;
3308         int err = 0;
3309         u64 error = 0;
3310         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3311
3312         if (btrfs_root_refs(&root->root_item) == 0) {
3313                 if (!cache_tree_empty(inode_cache))
3314                         fprintf(stderr, "warning line %d\n", __LINE__);
3315                 return 0;
3316         }
3317
3318         /*
3319          * We need to repair backrefs first because we could change some of the
3320          * errors in the inode recs.
3321          *
3322          * We also need to go through and delete invalid backrefs first and then
3323          * add the correct ones second.  We do this because we may get EEXIST
3324          * when adding back the correct index because we hadn't yet deleted the
3325          * invalid index.
3326          *
3327          * For example, if we were missing a dir index then the directories
3328          * isize would be wrong, so if we fixed the isize to what we thought it
3329          * would be and then fixed the backref we'd still have a invalid fs, so
3330          * we need to add back the dir index and then check to see if the isize
3331          * is still wrong.
3332          */
3333         while (stage < 3) {
3334                 stage++;
3335                 if (stage == 3 && !err)
3336                         break;
3337
3338                 cache = search_cache_extent(inode_cache, 0);
3339                 while (repair && cache) {
3340                         node = container_of(cache, struct ptr_node, cache);
3341                         rec = node->data;
3342                         cache = next_cache_extent(cache);
3343
3344                         /* Need to free everything up and rescan */
3345                         if (stage == 3) {
3346                                 remove_cache_extent(inode_cache, &node->cache);
3347                                 free(node);
3348                                 free_inode_rec(rec);
3349                                 continue;
3350                         }
3351
3352                         if (list_empty(&rec->backrefs))
3353                                 continue;
3354
3355                         ret = repair_inode_backrefs(root, rec, inode_cache,
3356                                                     stage == 1);
3357                         if (ret < 0) {
3358                                 err = ret;
3359                                 stage = 2;
3360                                 break;
3361                         } if (ret > 0) {
3362                                 err = -EAGAIN;
3363                         }
3364                 }
3365         }
3366         if (err)
3367                 return err;
3368
3369         rec = get_inode_rec(inode_cache, root_dirid, 0);
3370         BUG_ON(IS_ERR(rec));
3371         if (rec) {
3372                 ret = check_root_dir(rec);
3373                 if (ret) {
3374                         fprintf(stderr, "root %llu root dir %llu error\n",
3375                                 (unsigned long long)root->root_key.objectid,
3376                                 (unsigned long long)root_dirid);
3377                         print_inode_error(root, rec);
3378                         error++;
3379                 }
3380         } else {
3381                 if (repair) {
3382                         struct btrfs_trans_handle *trans;
3383
3384                         trans = btrfs_start_transaction(root, 1);
3385                         if (IS_ERR(trans)) {
3386                                 err = PTR_ERR(trans);
3387                                 return err;
3388                         }
3389
3390                         fprintf(stderr,
3391                                 "root %llu missing its root dir, recreating\n",
3392                                 (unsigned long long)root->objectid);
3393
3394                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3395                         BUG_ON(ret);
3396
3397                         btrfs_commit_transaction(trans, root);
3398                         return -EAGAIN;
3399                 }
3400
3401                 fprintf(stderr, "root %llu root dir %llu not found\n",
3402                         (unsigned long long)root->root_key.objectid,
3403                         (unsigned long long)root_dirid);
3404         }
3405
3406         while (1) {
3407                 cache = search_cache_extent(inode_cache, 0);
3408                 if (!cache)
3409                         break;
3410                 node = container_of(cache, struct ptr_node, cache);
3411                 rec = node->data;
3412                 remove_cache_extent(inode_cache, &node->cache);
3413                 free(node);
3414                 if (rec->ino == root_dirid ||
3415                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3416                         free_inode_rec(rec);
3417                         continue;
3418                 }
3419
3420                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3421                         ret = check_orphan_item(root, rec->ino);
3422                         if (ret == 0)
3423                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3424                         if (can_free_inode_rec(rec)) {
3425                                 free_inode_rec(rec);
3426                                 continue;
3427                         }
3428                 }
3429
3430                 if (!rec->found_inode_item)
3431                         rec->errors |= I_ERR_NO_INODE_ITEM;
3432                 if (rec->found_link != rec->nlink)
3433                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3434                 if (repair) {
3435                         ret = try_repair_inode(root, rec);
3436                         if (ret == 0 && can_free_inode_rec(rec)) {
3437                                 free_inode_rec(rec);
3438                                 continue;
3439                         }
3440                         ret = 0;
3441                 }
3442
3443                 if (!(repair && ret == 0))
3444                         error++;
3445                 print_inode_error(root, rec);
3446                 list_for_each_entry(backref, &rec->backrefs, list) {
3447                         if (!backref->found_dir_item)
3448                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3449                         if (!backref->found_dir_index)
3450                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3451                         if (!backref->found_inode_ref)
3452                                 backref->errors |= REF_ERR_NO_INODE_REF;
3453                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3454                                 " namelen %u name %s filetype %d errors %x",
3455                                 (unsigned long long)backref->dir,
3456                                 (unsigned long long)backref->index,
3457                                 backref->namelen, backref->name,
3458                                 backref->filetype, backref->errors);
3459                         print_ref_error(backref->errors);
3460                 }
3461                 free_inode_rec(rec);
3462         }
3463         return (error > 0) ? -1 : 0;
3464 }
3465
3466 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3467                                         u64 objectid)
3468 {
3469         struct cache_extent *cache;
3470         struct root_record *rec = NULL;
3471         int ret;
3472
3473         cache = lookup_cache_extent(root_cache, objectid, 1);
3474         if (cache) {
3475                 rec = container_of(cache, struct root_record, cache);
3476         } else {
3477                 rec = calloc(1, sizeof(*rec));
3478                 if (!rec)
3479                         return ERR_PTR(-ENOMEM);
3480                 rec->objectid = objectid;
3481                 INIT_LIST_HEAD(&rec->backrefs);
3482                 rec->cache.start = objectid;
3483                 rec->cache.size = 1;
3484
3485                 ret = insert_cache_extent(root_cache, &rec->cache);
3486                 if (ret)
3487                         return ERR_PTR(-EEXIST);
3488         }
3489         return rec;
3490 }
3491
3492 static struct root_backref *get_root_backref(struct root_record *rec,
3493                                              u64 ref_root, u64 dir, u64 index,
3494                                              const char *name, int namelen)
3495 {
3496         struct root_backref *backref;
3497
3498         list_for_each_entry(backref, &rec->backrefs, list) {
3499                 if (backref->ref_root != ref_root || backref->dir != dir ||
3500                     backref->namelen != namelen)
3501                         continue;
3502                 if (memcmp(name, backref->name, namelen))
3503                         continue;
3504                 return backref;
3505         }
3506
3507         backref = calloc(1, sizeof(*backref) + namelen + 1);
3508         if (!backref)
3509                 return NULL;
3510         backref->ref_root = ref_root;
3511         backref->dir = dir;
3512         backref->index = index;
3513         backref->namelen = namelen;
3514         memcpy(backref->name, name, namelen);
3515         backref->name[namelen] = '\0';
3516         list_add_tail(&backref->list, &rec->backrefs);
3517         return backref;
3518 }
3519
3520 static void free_root_record(struct cache_extent *cache)
3521 {
3522         struct root_record *rec;
3523         struct root_backref *backref;
3524
3525         rec = container_of(cache, struct root_record, cache);
3526         while (!list_empty(&rec->backrefs)) {
3527                 backref = to_root_backref(rec->backrefs.next);
3528                 list_del(&backref->list);
3529                 free(backref);
3530         }
3531
3532         free(rec);
3533 }
3534
3535 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3536
3537 static int add_root_backref(struct cache_tree *root_cache,
3538                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3539                             const char *name, int namelen,
3540                             int item_type, int errors)
3541 {
3542         struct root_record *rec;
3543         struct root_backref *backref;
3544
3545         rec = get_root_rec(root_cache, root_id);
3546         BUG_ON(IS_ERR(rec));
3547         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3548         BUG_ON(!backref);
3549
3550         backref->errors |= errors;
3551
3552         if (item_type != BTRFS_DIR_ITEM_KEY) {
3553                 if (backref->found_dir_index || backref->found_back_ref ||
3554                     backref->found_forward_ref) {
3555                         if (backref->index != index)
3556                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3557                 } else {
3558                         backref->index = index;
3559                 }
3560         }
3561
3562         if (item_type == BTRFS_DIR_ITEM_KEY) {
3563                 if (backref->found_forward_ref)
3564                         rec->found_ref++;
3565                 backref->found_dir_item = 1;
3566         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3567                 backref->found_dir_index = 1;
3568         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3569                 if (backref->found_forward_ref)
3570                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3571                 else if (backref->found_dir_item)
3572                         rec->found_ref++;
3573                 backref->found_forward_ref = 1;
3574         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3575                 if (backref->found_back_ref)
3576                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3577                 backref->found_back_ref = 1;
3578         } else {
3579                 BUG_ON(1);
3580         }
3581
3582         if (backref->found_forward_ref && backref->found_dir_item)
3583                 backref->reachable = 1;
3584         return 0;
3585 }
3586
3587 static int merge_root_recs(struct btrfs_root *root,
3588                            struct cache_tree *src_cache,
3589                            struct cache_tree *dst_cache)
3590 {
3591         struct cache_extent *cache;
3592         struct ptr_node *node;
3593         struct inode_record *rec;
3594         struct inode_backref *backref;
3595         int ret = 0;
3596
3597         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3598                 free_inode_recs_tree(src_cache);
3599                 return 0;
3600         }
3601
3602         while (1) {
3603                 cache = search_cache_extent(src_cache, 0);
3604                 if (!cache)
3605                         break;
3606                 node = container_of(cache, struct ptr_node, cache);
3607                 rec = node->data;
3608                 remove_cache_extent(src_cache, &node->cache);
3609                 free(node);
3610
3611                 ret = is_child_root(root, root->objectid, rec->ino);
3612                 if (ret < 0)
3613                         break;
3614                 else if (ret == 0)
3615                         goto skip;
3616
3617                 list_for_each_entry(backref, &rec->backrefs, list) {
3618                         BUG_ON(backref->found_inode_ref);
3619                         if (backref->found_dir_item)
3620                                 add_root_backref(dst_cache, rec->ino,
3621                                         root->root_key.objectid, backref->dir,
3622                                         backref->index, backref->name,
3623                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3624                                         backref->errors);
3625                         if (backref->found_dir_index)
3626                                 add_root_backref(dst_cache, rec->ino,
3627                                         root->root_key.objectid, backref->dir,
3628                                         backref->index, backref->name,
3629                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3630                                         backref->errors);
3631                 }
3632 skip:
3633                 free_inode_rec(rec);
3634         }
3635         if (ret < 0)
3636                 return ret;
3637         return 0;
3638 }
3639
3640 static int check_root_refs(struct btrfs_root *root,
3641                            struct cache_tree *root_cache)
3642 {
3643         struct root_record *rec;
3644         struct root_record *ref_root;
3645         struct root_backref *backref;
3646         struct cache_extent *cache;
3647         int loop = 1;
3648         int ret;
3649         int error;
3650         int errors = 0;
3651
3652         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3653         BUG_ON(IS_ERR(rec));
3654         rec->found_ref = 1;
3655
3656         /* fixme: this can not detect circular references */
3657         while (loop) {
3658                 loop = 0;
3659                 cache = search_cache_extent(root_cache, 0);
3660                 while (1) {
3661                         if (!cache)
3662                                 break;
3663                         rec = container_of(cache, struct root_record, cache);
3664                         cache = next_cache_extent(cache);
3665
3666                         if (rec->found_ref == 0)
3667                                 continue;
3668
3669                         list_for_each_entry(backref, &rec->backrefs, list) {
3670                                 if (!backref->reachable)
3671                                         continue;
3672
3673                                 ref_root = get_root_rec(root_cache,
3674                                                         backref->ref_root);
3675                                 BUG_ON(IS_ERR(ref_root));
3676                                 if (ref_root->found_ref > 0)
3677                                         continue;
3678
3679                                 backref->reachable = 0;
3680                                 rec->found_ref--;
3681                                 if (rec->found_ref == 0)
3682                                         loop = 1;
3683                         }
3684                 }
3685         }
3686
3687         cache = search_cache_extent(root_cache, 0);
3688         while (1) {
3689                 if (!cache)
3690                         break;
3691                 rec = container_of(cache, struct root_record, cache);
3692                 cache = next_cache_extent(cache);
3693
3694                 if (rec->found_ref == 0 &&
3695                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3696                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3697                         ret = check_orphan_item(root->fs_info->tree_root,
3698                                                 rec->objectid);
3699                         if (ret == 0)
3700                                 continue;
3701
3702                         /*
3703                          * If we don't have a root item then we likely just have
3704                          * a dir item in a snapshot for this root but no actual
3705                          * ref key or anything so it's meaningless.
3706                          */
3707                         if (!rec->found_root_item)
3708                                 continue;
3709                         errors++;
3710                         fprintf(stderr, "fs tree %llu not referenced\n",
3711                                 (unsigned long long)rec->objectid);
3712                 }
3713
3714                 error = 0;
3715                 if (rec->found_ref > 0 && !rec->found_root_item)
3716                         error = 1;
3717                 list_for_each_entry(backref, &rec->backrefs, list) {
3718                         if (!backref->found_dir_item)
3719                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3720                         if (!backref->found_dir_index)
3721                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3722                         if (!backref->found_back_ref)
3723                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3724                         if (!backref->found_forward_ref)
3725                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3726                         if (backref->reachable && backref->errors)
3727                                 error = 1;
3728                 }
3729                 if (!error)
3730                         continue;
3731
3732                 errors++;
3733                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3734                         (unsigned long long)rec->objectid, rec->found_ref,
3735                          rec->found_root_item ? "" : "not found");
3736
3737                 list_for_each_entry(backref, &rec->backrefs, list) {
3738                         if (!backref->reachable)
3739                                 continue;
3740                         if (!backref->errors && rec->found_root_item)
3741                                 continue;
3742                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3743                                 " index %llu namelen %u name %s errors %x\n",
3744                                 (unsigned long long)backref->ref_root,
3745                                 (unsigned long long)backref->dir,
3746                                 (unsigned long long)backref->index,
3747                                 backref->namelen, backref->name,
3748                                 backref->errors);
3749                         print_ref_error(backref->errors);
3750                 }
3751         }
3752         return errors > 0 ? 1 : 0;
3753 }
3754
3755 static int process_root_ref(struct extent_buffer *eb, int slot,
3756                             struct btrfs_key *key,
3757                             struct cache_tree *root_cache)
3758 {
3759         u64 dirid;
3760         u64 index;
3761         u32 len;
3762         u32 name_len;
3763         struct btrfs_root_ref *ref;
3764         char namebuf[BTRFS_NAME_LEN];
3765         int error;
3766
3767         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3768
3769         dirid = btrfs_root_ref_dirid(eb, ref);
3770         index = btrfs_root_ref_sequence(eb, ref);
3771         name_len = btrfs_root_ref_name_len(eb, ref);
3772
3773         if (name_len <= BTRFS_NAME_LEN) {
3774                 len = name_len;
3775                 error = 0;
3776         } else {
3777                 len = BTRFS_NAME_LEN;
3778                 error = REF_ERR_NAME_TOO_LONG;
3779         }
3780         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3781
3782         if (key->type == BTRFS_ROOT_REF_KEY) {
3783                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3784                                  index, namebuf, len, key->type, error);
3785         } else {
3786                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3787                                  index, namebuf, len, key->type, error);
3788         }
3789         return 0;
3790 }
3791
3792 static void free_corrupt_block(struct cache_extent *cache)
3793 {
3794         struct btrfs_corrupt_block *corrupt;
3795
3796         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3797         free(corrupt);
3798 }
3799
3800 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3801
3802 /*
3803  * Repair the btree of the given root.
3804  *
3805  * The fix is to remove the node key in corrupt_blocks cache_tree.
3806  * and rebalance the tree.
3807  * After the fix, the btree should be writeable.
3808  */
3809 static int repair_btree(struct btrfs_root *root,
3810                         struct cache_tree *corrupt_blocks)
3811 {
3812         struct btrfs_trans_handle *trans;
3813         struct btrfs_path path;
3814         struct btrfs_corrupt_block *corrupt;
3815         struct cache_extent *cache;
3816         struct btrfs_key key;
3817         u64 offset;
3818         int level;
3819         int ret = 0;
3820
3821         if (cache_tree_empty(corrupt_blocks))
3822                 return 0;
3823
3824         trans = btrfs_start_transaction(root, 1);
3825         if (IS_ERR(trans)) {
3826                 ret = PTR_ERR(trans);
3827                 fprintf(stderr, "Error starting transaction: %s\n",
3828                         strerror(-ret));
3829                 return ret;
3830         }
3831         btrfs_init_path(&path);
3832         cache = first_cache_extent(corrupt_blocks);
3833         while (cache) {
3834                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3835                                        cache);
3836                 level = corrupt->level;
3837                 path.lowest_level = level;
3838                 key.objectid = corrupt->key.objectid;
3839                 key.type = corrupt->key.type;
3840                 key.offset = corrupt->key.offset;
3841
3842                 /*
3843                  * Here we don't want to do any tree balance, since it may
3844                  * cause a balance with corrupted brother leaf/node,
3845                  * so ins_len set to 0 here.
3846                  * Balance will be done after all corrupt node/leaf is deleted.
3847                  */
3848                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3849                 if (ret < 0)
3850                         goto out;
3851                 offset = btrfs_node_blockptr(path.nodes[level],
3852                                              path.slots[level]);
3853
3854                 /* Remove the ptr */
3855                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3856                 if (ret < 0)
3857                         goto out;
3858                 /*
3859                  * Remove the corresponding extent
3860                  * return value is not concerned.
3861                  */
3862                 btrfs_release_path(&path);
3863                 ret = btrfs_free_extent(trans, root, offset,
3864                                 root->fs_info->nodesize, 0,
3865                                 root->root_key.objectid, level - 1, 0);
3866                 cache = next_cache_extent(cache);
3867         }
3868
3869         /* Balance the btree using btrfs_search_slot() */
3870         cache = first_cache_extent(corrupt_blocks);
3871         while (cache) {
3872                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3873                                        cache);
3874                 memcpy(&key, &corrupt->key, sizeof(key));
3875                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3876                 if (ret < 0)
3877                         goto out;
3878                 /* return will always >0 since it won't find the item */
3879                 ret = 0;
3880                 btrfs_release_path(&path);
3881                 cache = next_cache_extent(cache);
3882         }
3883 out:
3884         btrfs_commit_transaction(trans, root);
3885         btrfs_release_path(&path);
3886         return ret;
3887 }
3888
3889 static int check_fs_root(struct btrfs_root *root,
3890                          struct cache_tree *root_cache,
3891                          struct walk_control *wc)
3892 {
3893         int ret = 0;
3894         int err = 0;
3895         int wret;
3896         int level;
3897         struct btrfs_path path;
3898         struct shared_node root_node;
3899         struct root_record *rec;
3900         struct btrfs_root_item *root_item = &root->root_item;
3901         struct cache_tree corrupt_blocks;
3902         struct orphan_data_extent *orphan;
3903         struct orphan_data_extent *tmp;
3904         enum btrfs_tree_block_status status;
3905         struct node_refs nrefs;
3906
3907         /*
3908          * Reuse the corrupt_block cache tree to record corrupted tree block
3909          *
3910          * Unlike the usage in extent tree check, here we do it in a per
3911          * fs/subvol tree base.
3912          */
3913         cache_tree_init(&corrupt_blocks);
3914         root->fs_info->corrupt_blocks = &corrupt_blocks;
3915
3916         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3917                 rec = get_root_rec(root_cache, root->root_key.objectid);
3918                 BUG_ON(IS_ERR(rec));
3919                 if (btrfs_root_refs(root_item) > 0)
3920                         rec->found_root_item = 1;
3921         }
3922
3923         btrfs_init_path(&path);
3924         memset(&root_node, 0, sizeof(root_node));
3925         cache_tree_init(&root_node.root_cache);
3926         cache_tree_init(&root_node.inode_cache);
3927         memset(&nrefs, 0, sizeof(nrefs));
3928
3929         /* Move the orphan extent record to corresponding inode_record */
3930         list_for_each_entry_safe(orphan, tmp,
3931                                  &root->orphan_data_extents, list) {
3932                 struct inode_record *inode;
3933
3934                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3935                                       1);
3936                 BUG_ON(IS_ERR(inode));
3937                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3938                 list_move(&orphan->list, &inode->orphan_extents);
3939         }
3940
3941         level = btrfs_header_level(root->node);
3942         memset(wc->nodes, 0, sizeof(wc->nodes));
3943         wc->nodes[level] = &root_node;
3944         wc->active_node = level;
3945         wc->root_level = level;
3946
3947         /* We may not have checked the root block, lets do that now */
3948         if (btrfs_is_leaf(root->node))
3949                 status = btrfs_check_leaf(root, NULL, root->node);
3950         else
3951                 status = btrfs_check_node(root, NULL, root->node);
3952         if (status != BTRFS_TREE_BLOCK_CLEAN)
3953                 return -EIO;
3954
3955         if (btrfs_root_refs(root_item) > 0 ||
3956             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3957                 path.nodes[level] = root->node;
3958                 extent_buffer_get(root->node);
3959                 path.slots[level] = 0;
3960         } else {
3961                 struct btrfs_key key;
3962                 struct btrfs_disk_key found_key;
3963
3964                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3965                 level = root_item->drop_level;
3966                 path.lowest_level = level;
3967                 if (level > btrfs_header_level(root->node) ||
3968                     level >= BTRFS_MAX_LEVEL) {
3969                         error("ignoring invalid drop level: %u", level);
3970                         goto skip_walking;
3971                 }
3972                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3973                 if (wret < 0)
3974                         goto skip_walking;
3975                 btrfs_node_key(path.nodes[level], &found_key,
3976                                 path.slots[level]);
3977                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3978                                         sizeof(found_key)));
3979         }
3980
3981         while (1) {
3982                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3983                 if (wret < 0)
3984                         ret = wret;
3985                 if (wret != 0)
3986                         break;
3987
3988                 wret = walk_up_tree(root, &path, wc, &level);
3989                 if (wret < 0)
3990                         ret = wret;
3991                 if (wret != 0)
3992                         break;
3993         }
3994 skip_walking:
3995         btrfs_release_path(&path);
3996
3997         if (!cache_tree_empty(&corrupt_blocks)) {
3998                 struct cache_extent *cache;
3999                 struct btrfs_corrupt_block *corrupt;
4000
4001                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4002                        root->root_key.objectid);
4003                 cache = first_cache_extent(&corrupt_blocks);
4004                 while (cache) {
4005                         corrupt = container_of(cache,
4006                                                struct btrfs_corrupt_block,
4007                                                cache);
4008                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4009                                cache->start, corrupt->level,
4010                                corrupt->key.objectid, corrupt->key.type,
4011                                corrupt->key.offset);
4012                         cache = next_cache_extent(cache);
4013                 }
4014                 if (repair) {
4015                         printf("Try to repair the btree for root %llu\n",
4016                                root->root_key.objectid);
4017                         ret = repair_btree(root, &corrupt_blocks);
4018                         if (ret < 0)
4019                                 fprintf(stderr, "Failed to repair btree: %s\n",
4020                                         strerror(-ret));
4021                         if (!ret)
4022                                 printf("Btree for root %llu is fixed\n",
4023                                        root->root_key.objectid);
4024                 }
4025         }
4026
4027         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4028         if (err < 0)
4029                 ret = err;
4030
4031         if (root_node.current) {
4032                 root_node.current->checked = 1;
4033                 maybe_free_inode_rec(&root_node.inode_cache,
4034                                 root_node.current);
4035         }
4036
4037         err = check_inode_recs(root, &root_node.inode_cache);
4038         if (!ret)
4039                 ret = err;
4040
4041         free_corrupt_blocks_tree(&corrupt_blocks);
4042         root->fs_info->corrupt_blocks = NULL;
4043         free_orphan_data_extents(&root->orphan_data_extents);
4044         return ret;
4045 }
4046
4047 static int fs_root_objectid(u64 objectid)
4048 {
4049         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4050             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4051                 return 1;
4052         return is_fstree(objectid);
4053 }
4054
4055 static int check_fs_roots(struct btrfs_root *root,
4056                           struct cache_tree *root_cache)
4057 {
4058         struct btrfs_path path;
4059         struct btrfs_key key;
4060         struct walk_control wc;
4061         struct extent_buffer *leaf, *tree_node;
4062         struct btrfs_root *tmp_root;
4063         struct btrfs_root *tree_root = root->fs_info->tree_root;
4064         int ret;
4065         int err = 0;
4066
4067         if (ctx.progress_enabled) {
4068                 ctx.tp = TASK_FS_ROOTS;
4069                 task_start(ctx.info);
4070         }
4071
4072         /*
4073          * Just in case we made any changes to the extent tree that weren't
4074          * reflected into the free space cache yet.
4075          */
4076         if (repair)
4077                 reset_cached_block_groups(root->fs_info);
4078         memset(&wc, 0, sizeof(wc));
4079         cache_tree_init(&wc.shared);
4080         btrfs_init_path(&path);
4081
4082 again:
4083         key.offset = 0;
4084         key.objectid = 0;
4085         key.type = BTRFS_ROOT_ITEM_KEY;
4086         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4087         if (ret < 0) {
4088                 err = 1;
4089                 goto out;
4090         }
4091         tree_node = tree_root->node;
4092         while (1) {
4093                 if (tree_node != tree_root->node) {
4094                         free_root_recs_tree(root_cache);
4095                         btrfs_release_path(&path);
4096                         goto again;
4097                 }
4098                 leaf = path.nodes[0];
4099                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4100                         ret = btrfs_next_leaf(tree_root, &path);
4101                         if (ret) {
4102                                 if (ret < 0)
4103                                         err = 1;
4104                                 break;
4105                         }
4106                         leaf = path.nodes[0];
4107                 }
4108                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4109                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4110                     fs_root_objectid(key.objectid)) {
4111                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4112                                 tmp_root = btrfs_read_fs_root_no_cache(
4113                                                 root->fs_info, &key);
4114                         } else {
4115                                 key.offset = (u64)-1;
4116                                 tmp_root = btrfs_read_fs_root(
4117                                                 root->fs_info, &key);
4118                         }
4119                         if (IS_ERR(tmp_root)) {
4120                                 err = 1;
4121                                 goto next;
4122                         }
4123                         ret = check_fs_root(tmp_root, root_cache, &wc);
4124                         if (ret == -EAGAIN) {
4125                                 free_root_recs_tree(root_cache);
4126                                 btrfs_release_path(&path);
4127                                 goto again;
4128                         }
4129                         if (ret)
4130                                 err = 1;
4131                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4132                                 btrfs_free_fs_root(tmp_root);
4133                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4134                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4135                         process_root_ref(leaf, path.slots[0], &key,
4136                                          root_cache);
4137                 }
4138 next:
4139                 path.slots[0]++;
4140         }
4141 out:
4142         btrfs_release_path(&path);
4143         if (err)
4144                 free_extent_cache_tree(&wc.shared);
4145         if (!cache_tree_empty(&wc.shared))
4146                 fprintf(stderr, "warning line %d\n", __LINE__);
4147
4148         task_stop(ctx.info);
4149
4150         return err;
4151 }
4152
4153 /*
4154  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4155  * INODE_REF/INODE_EXTREF match.
4156  *
4157  * @root:       the root of the fs/file tree
4158  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4159  * @key:        the key of the DIR_ITEM/DIR_INDEX
4160  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4161  *              distinguish root_dir between normal dir/file
4162  * @name:       the name in the INODE_REF/INODE_EXTREF
4163  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4164  * @mode:       the st_mode of INODE_ITEM
4165  *
4166  * Return 0 if no error occurred.
4167  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4168  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4169  * dir/file.
4170  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4171  * not match for normal dir/file.
4172  */
4173 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4174                          struct btrfs_key *key, u64 index, char *name,
4175                          u32 namelen, u32 mode)
4176 {
4177         struct btrfs_path path;
4178         struct extent_buffer *node;
4179         struct btrfs_dir_item *di;
4180         struct btrfs_key location;
4181         char namebuf[BTRFS_NAME_LEN] = {0};
4182         u32 total;
4183         u32 cur = 0;
4184         u32 len;
4185         u32 name_len;
4186         u32 data_len;
4187         u8 filetype;
4188         int slot;
4189         int ret;
4190
4191         btrfs_init_path(&path);
4192         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4193         if (ret < 0) {
4194                 ret = DIR_ITEM_MISSING;
4195                 goto out;
4196         }
4197
4198         /* Process root dir and goto out*/
4199         if (index == 0) {
4200                 if (ret == 0) {
4201                         ret = ROOT_DIR_ERROR;
4202                         error(
4203                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4204                                 root->objectid,
4205                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4206                                         "REF" : "EXTREF",
4207                                 ref_key->objectid, ref_key->offset,
4208                                 key->type == BTRFS_DIR_ITEM_KEY ?
4209                                         "DIR_ITEM" : "DIR_INDEX");
4210                 } else {
4211                         ret = 0;
4212                 }
4213
4214                 goto out;
4215         }
4216
4217         /* Process normal file/dir */
4218         if (ret > 0) {
4219                 ret = DIR_ITEM_MISSING;
4220                 error(
4221                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4222                         root->objectid,
4223                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4224                         ref_key->objectid, ref_key->offset,
4225                         key->type == BTRFS_DIR_ITEM_KEY ?
4226                                 "DIR_ITEM" : "DIR_INDEX",
4227                         key->objectid, key->offset, namelen, name,
4228                         imode_to_type(mode));
4229                 goto out;
4230         }
4231
4232         /* Check whether inode_id/filetype/name match */
4233         node = path.nodes[0];
4234         slot = path.slots[0];
4235         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4236         total = btrfs_item_size_nr(node, slot);
4237         while (cur < total) {
4238                 ret = DIR_ITEM_MISMATCH;
4239                 name_len = btrfs_dir_name_len(node, di);
4240                 data_len = btrfs_dir_data_len(node, di);
4241
4242                 btrfs_dir_item_key_to_cpu(node, di, &location);
4243                 if (location.objectid != ref_key->objectid ||
4244                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4245                     location.offset != 0)
4246                         goto next;
4247
4248                 filetype = btrfs_dir_type(node, di);
4249                 if (imode_to_type(mode) != filetype)
4250                         goto next;
4251
4252                 if (cur + sizeof(*di) + name_len > total ||
4253                     name_len > BTRFS_NAME_LEN) {
4254                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4255                                 root->objectid,
4256                                 key->type == BTRFS_DIR_ITEM_KEY ?
4257                                 "DIR_ITEM" : "DIR_INDEX",
4258                                 key->objectid, key->offset, name_len);
4259
4260                         if (cur + sizeof(*di) > total)
4261                                 break;
4262                         len = min_t(u32, total - cur - sizeof(*di),
4263                                     BTRFS_NAME_LEN);
4264                 } else {
4265                         len = name_len;
4266                 }
4267
4268                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4269                 if (len != namelen || strncmp(namebuf, name, len))
4270                         goto next;
4271
4272                 ret = 0;
4273                 goto out;
4274 next:
4275                 len = sizeof(*di) + name_len + data_len;
4276                 di = (struct btrfs_dir_item *)((char *)di + len);
4277                 cur += len;
4278         }
4279         if (ret == DIR_ITEM_MISMATCH)
4280                 error(
4281                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4282                         root->objectid,
4283                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4284                         ref_key->objectid, ref_key->offset,
4285                         key->type == BTRFS_DIR_ITEM_KEY ?
4286                                 "DIR_ITEM" : "DIR_INDEX",
4287                         key->objectid, key->offset, namelen, name,
4288                         imode_to_type(mode));
4289 out:
4290         btrfs_release_path(&path);
4291         return ret;
4292 }
4293
4294 /*
4295  * Traverse the given INODE_REF and call find_dir_item() to find related
4296  * DIR_ITEM/DIR_INDEX.
4297  *
4298  * @root:       the root of the fs/file tree
4299  * @ref_key:    the key of the INODE_REF
4300  * @refs:       the count of INODE_REF
4301  * @mode:       the st_mode of INODE_ITEM
4302  *
4303  * Return 0 if no error occurred.
4304  */
4305 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4306                            struct extent_buffer *node, int slot, u64 *refs,
4307                            int mode)
4308 {
4309         struct btrfs_key key;
4310         struct btrfs_inode_ref *ref;
4311         char namebuf[BTRFS_NAME_LEN] = {0};
4312         u32 total;
4313         u32 cur = 0;
4314         u32 len;
4315         u32 name_len;
4316         u64 index;
4317         int ret, err = 0;
4318
4319         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4320         total = btrfs_item_size_nr(node, slot);
4321
4322 next:
4323         /* Update inode ref count */
4324         (*refs)++;
4325
4326         index = btrfs_inode_ref_index(node, ref);
4327         name_len = btrfs_inode_ref_name_len(node, ref);
4328         if (cur + sizeof(*ref) + name_len > total ||
4329             name_len > BTRFS_NAME_LEN) {
4330                 warning("root %llu INODE_REF[%llu %llu] name too long",
4331                         root->objectid, ref_key->objectid, ref_key->offset);
4332
4333                 if (total < cur + sizeof(*ref))
4334                         goto out;
4335                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4336         } else {
4337                 len = name_len;
4338         }
4339
4340         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4341
4342         /* Check root dir ref name */
4343         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4344                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4345                       root->objectid, ref_key->objectid, ref_key->offset,
4346                       namebuf);
4347                 err |= ROOT_DIR_ERROR;
4348         }
4349
4350         /* Find related DIR_INDEX */
4351         key.objectid = ref_key->offset;
4352         key.type = BTRFS_DIR_INDEX_KEY;
4353         key.offset = index;
4354         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4355         err |= ret;
4356
4357         /* Find related dir_item */
4358         key.objectid = ref_key->offset;
4359         key.type = BTRFS_DIR_ITEM_KEY;
4360         key.offset = btrfs_name_hash(namebuf, len);
4361         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4362         err |= ret;
4363
4364         len = sizeof(*ref) + name_len;
4365         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4366         cur += len;
4367         if (cur < total)
4368                 goto next;
4369
4370 out:
4371         return err;
4372 }
4373
4374 /*
4375  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4376  * DIR_ITEM/DIR_INDEX.
4377  *
4378  * @root:       the root of the fs/file tree
4379  * @ref_key:    the key of the INODE_EXTREF
4380  * @refs:       the count of INODE_EXTREF
4381  * @mode:       the st_mode of INODE_ITEM
4382  *
4383  * Return 0 if no error occurred.
4384  */
4385 static int check_inode_extref(struct btrfs_root *root,
4386                               struct btrfs_key *ref_key,
4387                               struct extent_buffer *node, int slot, u64 *refs,
4388                               int mode)
4389 {
4390         struct btrfs_key key;
4391         struct btrfs_inode_extref *extref;
4392         char namebuf[BTRFS_NAME_LEN] = {0};
4393         u32 total;
4394         u32 cur = 0;
4395         u32 len;
4396         u32 name_len;
4397         u64 index;
4398         u64 parent;
4399         int ret;
4400         int err = 0;
4401
4402         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4403         total = btrfs_item_size_nr(node, slot);
4404
4405 next:
4406         /* update inode ref count */
4407         (*refs)++;
4408         name_len = btrfs_inode_extref_name_len(node, extref);
4409         index = btrfs_inode_extref_index(node, extref);
4410         parent = btrfs_inode_extref_parent(node, extref);
4411         if (name_len <= BTRFS_NAME_LEN) {
4412                 len = name_len;
4413         } else {
4414                 len = BTRFS_NAME_LEN;
4415                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4416                         root->objectid, ref_key->objectid, ref_key->offset);
4417         }
4418         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4419
4420         /* Check root dir ref name */
4421         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4422                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4423                       root->objectid, ref_key->objectid, ref_key->offset,
4424                       namebuf);
4425                 err |= ROOT_DIR_ERROR;
4426         }
4427
4428         /* find related dir_index */
4429         key.objectid = parent;
4430         key.type = BTRFS_DIR_INDEX_KEY;
4431         key.offset = index;
4432         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4433         err |= ret;
4434
4435         /* find related dir_item */
4436         key.objectid = parent;
4437         key.type = BTRFS_DIR_ITEM_KEY;
4438         key.offset = btrfs_name_hash(namebuf, len);
4439         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4440         err |= ret;
4441
4442         len = sizeof(*extref) + name_len;
4443         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4444         cur += len;
4445
4446         if (cur < total)
4447                 goto next;
4448
4449         return err;
4450 }
4451
4452 /*
4453  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4454  * DIR_ITEM/DIR_INDEX match.
4455  *
4456  * @root:       the root of the fs/file tree
4457  * @key:        the key of the INODE_REF/INODE_EXTREF
4458  * @name:       the name in the INODE_REF/INODE_EXTREF
4459  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4460  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4461  * to (u64)-1
4462  * @ext_ref:    the EXTENDED_IREF feature
4463  *
4464  * Return 0 if no error occurred.
4465  * Return >0 for error bitmap
4466  */
4467 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4468                           char *name, int namelen, u64 index,
4469                           unsigned int ext_ref)
4470 {
4471         struct btrfs_path path;
4472         struct btrfs_inode_ref *ref;
4473         struct btrfs_inode_extref *extref;
4474         struct extent_buffer *node;
4475         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4476         u32 total;
4477         u32 cur = 0;
4478         u32 len;
4479         u32 ref_namelen;
4480         u64 ref_index;
4481         u64 parent;
4482         u64 dir_id;
4483         int slot;
4484         int ret;
4485
4486         btrfs_init_path(&path);
4487         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4488         if (ret) {
4489                 ret = INODE_REF_MISSING;
4490                 goto extref;
4491         }
4492
4493         node = path.nodes[0];
4494         slot = path.slots[0];
4495
4496         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4497         total = btrfs_item_size_nr(node, slot);
4498
4499         /* Iterate all entry of INODE_REF */
4500         while (cur < total) {
4501                 ret = INODE_REF_MISSING;
4502
4503                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4504                 ref_index = btrfs_inode_ref_index(node, ref);
4505                 if (index != (u64)-1 && index != ref_index)
4506                         goto next_ref;
4507
4508                 if (cur + sizeof(*ref) + ref_namelen > total ||
4509                     ref_namelen > BTRFS_NAME_LEN) {
4510                         warning("root %llu INODE %s[%llu %llu] name too long",
4511                                 root->objectid,
4512                                 key->type == BTRFS_INODE_REF_KEY ?
4513                                         "REF" : "EXTREF",
4514                                 key->objectid, key->offset);
4515
4516                         if (cur + sizeof(*ref) > total)
4517                                 break;
4518                         len = min_t(u32, total - cur - sizeof(*ref),
4519                                     BTRFS_NAME_LEN);
4520                 } else {
4521                         len = ref_namelen;
4522                 }
4523
4524                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4525                                    len);
4526
4527                 if (len != namelen || strncmp(ref_namebuf, name, len))
4528                         goto next_ref;
4529
4530                 ret = 0;
4531                 goto out;
4532 next_ref:
4533                 len = sizeof(*ref) + ref_namelen;
4534                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4535                 cur += len;
4536         }
4537
4538 extref:
4539         /* Skip if not support EXTENDED_IREF feature */
4540         if (!ext_ref)
4541                 goto out;
4542
4543         btrfs_release_path(&path);
4544         btrfs_init_path(&path);
4545
4546         dir_id = key->offset;
4547         key->type = BTRFS_INODE_EXTREF_KEY;
4548         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4549
4550         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4551         if (ret) {
4552                 ret = INODE_REF_MISSING;
4553                 goto out;
4554         }
4555
4556         node = path.nodes[0];
4557         slot = path.slots[0];
4558
4559         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4560         cur = 0;
4561         total = btrfs_item_size_nr(node, slot);
4562
4563         /* Iterate all entry of INODE_EXTREF */
4564         while (cur < total) {
4565                 ret = INODE_REF_MISSING;
4566
4567                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4568                 ref_index = btrfs_inode_extref_index(node, extref);
4569                 parent = btrfs_inode_extref_parent(node, extref);
4570                 if (index != (u64)-1 && index != ref_index)
4571                         goto next_extref;
4572
4573                 if (parent != dir_id)
4574                         goto next_extref;
4575
4576                 if (ref_namelen <= BTRFS_NAME_LEN) {
4577                         len = ref_namelen;
4578                 } else {
4579                         len = BTRFS_NAME_LEN;
4580                         warning("root %llu INODE %s[%llu %llu] name too long",
4581                                 root->objectid,
4582                                 key->type == BTRFS_INODE_REF_KEY ?
4583                                         "REF" : "EXTREF",
4584                                 key->objectid, key->offset);
4585                 }
4586                 read_extent_buffer(node, ref_namebuf,
4587                                    (unsigned long)(extref + 1), len);
4588
4589                 if (len != namelen || strncmp(ref_namebuf, name, len))
4590                         goto next_extref;
4591
4592                 ret = 0;
4593                 goto out;
4594
4595 next_extref:
4596                 len = sizeof(*extref) + ref_namelen;
4597                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4598                 cur += len;
4599
4600         }
4601 out:
4602         btrfs_release_path(&path);
4603         return ret;
4604 }
4605
4606 /*
4607  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4608  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4609  *
4610  * @root:       the root of the fs/file tree
4611  * @key:        the key of the INODE_REF/INODE_EXTREF
4612  * @size:       the st_size of the INODE_ITEM
4613  * @ext_ref:    the EXTENDED_IREF feature
4614  *
4615  * Return 0 if no error occurred.
4616  */
4617 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4618                           struct extent_buffer *node, int slot, u64 *size,
4619                           unsigned int ext_ref)
4620 {
4621         struct btrfs_dir_item *di;
4622         struct btrfs_inode_item *ii;
4623         struct btrfs_path path;
4624         struct btrfs_key location;
4625         char namebuf[BTRFS_NAME_LEN] = {0};
4626         u32 total;
4627         u32 cur = 0;
4628         u32 len;
4629         u32 name_len;
4630         u32 data_len;
4631         u8 filetype;
4632         u32 mode;
4633         u64 index;
4634         int ret;
4635         int err = 0;
4636
4637         /*
4638          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4639          * ignore index check.
4640          */
4641         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4642
4643         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4644         total = btrfs_item_size_nr(node, slot);
4645
4646         while (cur < total) {
4647                 data_len = btrfs_dir_data_len(node, di);
4648                 if (data_len)
4649                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4650                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4651                               "DIR_ITEM" : "DIR_INDEX",
4652                               key->objectid, key->offset, data_len);
4653
4654                 name_len = btrfs_dir_name_len(node, di);
4655                 if (cur + sizeof(*di) + name_len > total ||
4656                     name_len > BTRFS_NAME_LEN) {
4657                         warning("root %llu %s[%llu %llu] name too long",
4658                                 root->objectid,
4659                                 key->type == BTRFS_DIR_ITEM_KEY ?
4660                                 "DIR_ITEM" : "DIR_INDEX",
4661                                 key->objectid, key->offset);
4662
4663                         if (cur + sizeof(*di) > total)
4664                                 break;
4665                         len = min_t(u32, total - cur - sizeof(*di),
4666                                     BTRFS_NAME_LEN);
4667                 } else {
4668                         len = name_len;
4669                 }
4670                 (*size) += name_len;
4671
4672                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4673                 filetype = btrfs_dir_type(node, di);
4674
4675                 btrfs_init_path(&path);
4676                 btrfs_dir_item_key_to_cpu(node, di, &location);
4677
4678                 /* Ignore related ROOT_ITEM check */
4679                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4680                         goto next;
4681
4682                 /* Check relative INODE_ITEM(existence/filetype) */
4683                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4684                 if (ret) {
4685                         err |= INODE_ITEM_MISSING;
4686                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4687                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4688                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4689                               key->offset, location.objectid, name_len,
4690                               namebuf, filetype);
4691                         goto next;
4692                 }
4693
4694                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4695                                     struct btrfs_inode_item);
4696                 mode = btrfs_inode_mode(path.nodes[0], ii);
4697
4698                 if (imode_to_type(mode) != filetype) {
4699                         err |= INODE_ITEM_MISMATCH;
4700                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4701                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4702                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4703                               key->offset, name_len, namebuf, filetype);
4704                 }
4705
4706                 /* Check relative INODE_REF/INODE_EXTREF */
4707                 location.type = BTRFS_INODE_REF_KEY;
4708                 location.offset = key->objectid;
4709                 ret = find_inode_ref(root, &location, namebuf, len,
4710                                        index, ext_ref);
4711                 err |= ret;
4712                 if (ret & INODE_REF_MISSING)
4713                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4714                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4715                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4716                               key->offset, name_len, namebuf, filetype);
4717
4718 next:
4719                 btrfs_release_path(&path);
4720                 len = sizeof(*di) + name_len + data_len;
4721                 di = (struct btrfs_dir_item *)((char *)di + len);
4722                 cur += len;
4723
4724                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4725                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4726                               root->objectid, key->objectid, key->offset);
4727                         break;
4728                 }
4729         }
4730
4731         return err;
4732 }
4733
4734 /*
4735  * Check file extent datasum/hole, update the size of the file extents,
4736  * check and update the last offset of the file extent.
4737  *
4738  * @root:       the root of fs/file tree.
4739  * @fkey:       the key of the file extent.
4740  * @nodatasum:  INODE_NODATASUM feature.
4741  * @size:       the sum of all EXTENT_DATA items size for this inode.
4742  * @end:        the offset of the last extent.
4743  *
4744  * Return 0 if no error occurred.
4745  */
4746 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4747                              struct extent_buffer *node, int slot,
4748                              unsigned int nodatasum, u64 *size, u64 *end)
4749 {
4750         struct btrfs_file_extent_item *fi;
4751         u64 disk_bytenr;
4752         u64 disk_num_bytes;
4753         u64 extent_num_bytes;
4754         u64 extent_offset;
4755         u64 csum_found;         /* In byte size, sectorsize aligned */
4756         u64 search_start;       /* Logical range start we search for csum */
4757         u64 search_len;         /* Logical range len we search for csum */
4758         unsigned int extent_type;
4759         unsigned int is_hole;
4760         int compressed = 0;
4761         int ret;
4762         int err = 0;
4763
4764         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4765
4766         /* Check inline extent */
4767         extent_type = btrfs_file_extent_type(node, fi);
4768         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4769                 struct btrfs_item *e = btrfs_item_nr(slot);
4770                 u32 item_inline_len;
4771
4772                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4773                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4774                 compressed = btrfs_file_extent_compression(node, fi);
4775                 if (extent_num_bytes == 0) {
4776                         error(
4777                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4778                                 root->objectid, fkey->objectid, fkey->offset);
4779                         err |= FILE_EXTENT_ERROR;
4780                 }
4781                 if (!compressed && extent_num_bytes != item_inline_len) {
4782                         error(
4783                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4784                                 root->objectid, fkey->objectid, fkey->offset,
4785                                 extent_num_bytes, item_inline_len);
4786                         err |= FILE_EXTENT_ERROR;
4787                 }
4788                 *end += extent_num_bytes;
4789                 *size += extent_num_bytes;
4790                 return err;
4791         }
4792
4793         /* Check extent type */
4794         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4795                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4796                 err |= FILE_EXTENT_ERROR;
4797                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4798                       root->objectid, fkey->objectid, fkey->offset);
4799                 return err;
4800         }
4801
4802         /* Check REG_EXTENT/PREALLOC_EXTENT */
4803         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4804         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4805         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4806         extent_offset = btrfs_file_extent_offset(node, fi);
4807         compressed = btrfs_file_extent_compression(node, fi);
4808         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4809
4810         /*
4811          * Check EXTENT_DATA csum
4812          *
4813          * For plain (uncompressed) extent, we should only check the range
4814          * we're referring to, as it's possible that part of prealloc extent
4815          * has been written, and has csum:
4816          *
4817          * |<--- Original large preallocated extent A ---->|
4818          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4819          *      No csum                         Has csum
4820          *
4821          * For compressed extent, we should check the whole range.
4822          */
4823         if (!compressed) {
4824                 search_start = disk_bytenr + extent_offset;
4825                 search_len = extent_num_bytes;
4826         } else {
4827                 search_start = disk_bytenr;
4828                 search_len = disk_num_bytes;
4829         }
4830         ret = count_csum_range(root, search_start, search_len, &csum_found);
4831         if (csum_found > 0 && nodatasum) {
4832                 err |= ODD_CSUM_ITEM;
4833                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4834                       root->objectid, fkey->objectid, fkey->offset);
4835         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4836                    !is_hole && (ret < 0 || csum_found < search_len)) {
4837                 err |= CSUM_ITEM_MISSING;
4838                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4839                       root->objectid, fkey->objectid, fkey->offset,
4840                       csum_found, search_len);
4841         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4842                 err |= ODD_CSUM_ITEM;
4843                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4844                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4845         }
4846
4847         /* Check EXTENT_DATA hole */
4848         if (!no_holes && *end != fkey->offset) {
4849                 err |= FILE_EXTENT_ERROR;
4850                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4851                       root->objectid, fkey->objectid, fkey->offset);
4852         }
4853
4854         *end += extent_num_bytes;
4855         if (!is_hole)
4856                 *size += extent_num_bytes;
4857
4858         return err;
4859 }
4860
4861 /*
4862  * Check INODE_ITEM and related ITEMs (the same inode number)
4863  * 1. check link count
4864  * 2. check inode ref/extref
4865  * 3. check dir item/index
4866  *
4867  * @ext_ref:    the EXTENDED_IREF feature
4868  *
4869  * Return 0 if no error occurred.
4870  * Return >0 for error or hit the traversal is done(by error bitmap)
4871  */
4872 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4873                             unsigned int ext_ref)
4874 {
4875         struct extent_buffer *node;
4876         struct btrfs_inode_item *ii;
4877         struct btrfs_key key;
4878         u64 inode_id;
4879         u32 mode;
4880         u64 nlink;
4881         u64 nbytes;
4882         u64 isize;
4883         u64 size = 0;
4884         u64 refs = 0;
4885         u64 extent_end = 0;
4886         u64 extent_size = 0;
4887         unsigned int dir;
4888         unsigned int nodatasum;
4889         int slot;
4890         int ret;
4891         int err = 0;
4892
4893         node = path->nodes[0];
4894         slot = path->slots[0];
4895
4896         btrfs_item_key_to_cpu(node, &key, slot);
4897         inode_id = key.objectid;
4898
4899         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4900                 ret = btrfs_next_item(root, path);
4901                 if (ret > 0)
4902                         err |= LAST_ITEM;
4903                 return err;
4904         }
4905
4906         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4907         isize = btrfs_inode_size(node, ii);
4908         nbytes = btrfs_inode_nbytes(node, ii);
4909         mode = btrfs_inode_mode(node, ii);
4910         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4911         nlink = btrfs_inode_nlink(node, ii);
4912         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4913
4914         while (1) {
4915                 ret = btrfs_next_item(root, path);
4916                 if (ret < 0) {
4917                         /* out will fill 'err' rusing current statistics */
4918                         goto out;
4919                 } else if (ret > 0) {
4920                         err |= LAST_ITEM;
4921                         goto out;
4922                 }
4923
4924                 node = path->nodes[0];
4925                 slot = path->slots[0];
4926                 btrfs_item_key_to_cpu(node, &key, slot);
4927                 if (key.objectid != inode_id)
4928                         goto out;
4929
4930                 switch (key.type) {
4931                 case BTRFS_INODE_REF_KEY:
4932                         ret = check_inode_ref(root, &key, node, slot, &refs,
4933                                               mode);
4934                         err |= ret;
4935                         break;
4936                 case BTRFS_INODE_EXTREF_KEY:
4937                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4938                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4939                                         root->objectid, key.objectid,
4940                                         key.offset);
4941                         ret = check_inode_extref(root, &key, node, slot, &refs,
4942                                                  mode);
4943                         err |= ret;
4944                         break;
4945                 case BTRFS_DIR_ITEM_KEY:
4946                 case BTRFS_DIR_INDEX_KEY:
4947                         if (!dir) {
4948                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4949                                         root->objectid, inode_id,
4950                                         imode_to_type(mode), key.objectid,
4951                                         key.offset);
4952                         }
4953                         ret = check_dir_item(root, &key, node, slot, &size,
4954                                              ext_ref);
4955                         err |= ret;
4956                         break;
4957                 case BTRFS_EXTENT_DATA_KEY:
4958                         if (dir) {
4959                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4960                                         root->objectid, inode_id, key.objectid,
4961                                         key.offset);
4962                         }
4963                         ret = check_file_extent(root, &key, node, slot,
4964                                                 nodatasum, &extent_size,
4965                                                 &extent_end);
4966                         err |= ret;
4967                         break;
4968                 case BTRFS_XATTR_ITEM_KEY:
4969                         break;
4970                 default:
4971                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4972                               key.objectid, key.type, key.offset);
4973                 }
4974         }
4975
4976 out:
4977         /* verify INODE_ITEM nlink/isize/nbytes */
4978         if (dir) {
4979                 if (nlink != 1) {
4980                         err |= LINK_COUNT_ERROR;
4981                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4982                               root->objectid, inode_id, nlink);
4983                 }
4984
4985                 /*
4986                  * Just a warning, as dir inode nbytes is just an
4987                  * instructive value.
4988                  */
4989                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4990                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4991                                 root->objectid, inode_id,
4992                                 root->fs_info->nodesize);
4993                 }
4994
4995                 if (isize != size) {
4996                         err |= ISIZE_ERROR;
4997                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4998                               root->objectid, inode_id, isize, size);
4999                 }
5000         } else {
5001                 if (nlink != refs) {
5002                         err |= LINK_COUNT_ERROR;
5003                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5004                               root->objectid, inode_id, nlink, refs);
5005                 } else if (!nlink) {
5006                         err |= ORPHAN_ITEM;
5007                 }
5008
5009                 if (!nbytes && !no_holes && extent_end < isize) {
5010                         err |= NBYTES_ERROR;
5011                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5012                               root->objectid, inode_id, isize);
5013                 }
5014
5015                 if (nbytes != extent_size) {
5016                         err |= NBYTES_ERROR;
5017                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5018                               root->objectid, inode_id, nbytes, extent_size);
5019                 }
5020         }
5021
5022         return err;
5023 }
5024
5025 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5026 {
5027         struct btrfs_path path;
5028         struct btrfs_key key;
5029         int err = 0;
5030         int ret;
5031
5032         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5033         key.type = BTRFS_INODE_ITEM_KEY;
5034         key.offset = 0;
5035
5036         /* For root being dropped, we don't need to check first inode */
5037         if (btrfs_root_refs(&root->root_item) == 0 &&
5038             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5039             key.objectid)
5040                 return 0;
5041
5042         btrfs_init_path(&path);
5043
5044         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5045         if (ret < 0)
5046                 goto out;
5047         if (ret > 0) {
5048                 ret = 0;
5049                 err |= INODE_ITEM_MISSING;
5050                 error("first inode item of root %llu is missing",
5051                       root->objectid);
5052         }
5053
5054         err |= check_inode_item(root, &path, ext_ref);
5055         err &= ~LAST_ITEM;
5056         if (err && !ret)
5057                 ret = -EIO;
5058 out:
5059         btrfs_release_path(&path);
5060         return ret;
5061 }
5062
5063 /*
5064  * Iterate all item on the tree and call check_inode_item() to check.
5065  *
5066  * @root:       the root of the tree to be checked.
5067  * @ext_ref:    the EXTENDED_IREF feature
5068  *
5069  * Return 0 if no error found.
5070  * Return <0 for error.
5071  */
5072 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5073 {
5074         struct btrfs_path path;
5075         struct node_refs nrefs;
5076         struct btrfs_root_item *root_item = &root->root_item;
5077         int ret;
5078         int level;
5079         int err = 0;
5080
5081         /*
5082          * We need to manually check the first inode item(256)
5083          * As the following traversal function will only start from
5084          * the first inode item in the leaf, if inode item(256) is missing
5085          * we will just skip it forever.
5086          */
5087         ret = check_fs_first_inode(root, ext_ref);
5088         if (ret < 0)
5089                 return ret;
5090
5091         memset(&nrefs, 0, sizeof(nrefs));
5092         level = btrfs_header_level(root->node);
5093         btrfs_init_path(&path);
5094
5095         if (btrfs_root_refs(root_item) > 0 ||
5096             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5097                 path.nodes[level] = root->node;
5098                 path.slots[level] = 0;
5099                 extent_buffer_get(root->node);
5100         } else {
5101                 struct btrfs_key key;
5102
5103                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5104                 level = root_item->drop_level;
5105                 path.lowest_level = level;
5106                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5107                 if (ret < 0)
5108                         goto out;
5109                 ret = 0;
5110         }
5111
5112         while (1) {
5113                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5114                 err |= !!ret;
5115
5116                 /* if ret is negative, walk shall stop */
5117                 if (ret < 0) {
5118                         ret = err;
5119                         break;
5120                 }
5121
5122                 ret = walk_up_tree_v2(root, &path, &level);
5123                 if (ret != 0) {
5124                         /* Normal exit, reset ret to err */
5125                         ret = err;
5126                         break;
5127                 }
5128         }
5129
5130 out:
5131         btrfs_release_path(&path);
5132         return ret;
5133 }
5134
5135 /*
5136  * Find the relative ref for root_ref and root_backref.
5137  *
5138  * @root:       the root of the root tree.
5139  * @ref_key:    the key of the root ref.
5140  *
5141  * Return 0 if no error occurred.
5142  */
5143 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5144                           struct extent_buffer *node, int slot)
5145 {
5146         struct btrfs_path path;
5147         struct btrfs_key key;
5148         struct btrfs_root_ref *ref;
5149         struct btrfs_root_ref *backref;
5150         char ref_name[BTRFS_NAME_LEN] = {0};
5151         char backref_name[BTRFS_NAME_LEN] = {0};
5152         u64 ref_dirid;
5153         u64 ref_seq;
5154         u32 ref_namelen;
5155         u64 backref_dirid;
5156         u64 backref_seq;
5157         u32 backref_namelen;
5158         u32 len;
5159         int ret;
5160         int err = 0;
5161
5162         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5163         ref_dirid = btrfs_root_ref_dirid(node, ref);
5164         ref_seq = btrfs_root_ref_sequence(node, ref);
5165         ref_namelen = btrfs_root_ref_name_len(node, ref);
5166
5167         if (ref_namelen <= BTRFS_NAME_LEN) {
5168                 len = ref_namelen;
5169         } else {
5170                 len = BTRFS_NAME_LEN;
5171                 warning("%s[%llu %llu] ref_name too long",
5172                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5173                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5174                         ref_key->offset);
5175         }
5176         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5177
5178         /* Find relative root_ref */
5179         key.objectid = ref_key->offset;
5180         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5181         key.offset = ref_key->objectid;
5182
5183         btrfs_init_path(&path);
5184         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5185         if (ret) {
5186                 err |= ROOT_REF_MISSING;
5187                 error("%s[%llu %llu] couldn't find relative ref",
5188                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5189                       "ROOT_REF" : "ROOT_BACKREF",
5190                       ref_key->objectid, ref_key->offset);
5191                 goto out;
5192         }
5193
5194         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5195                                  struct btrfs_root_ref);
5196         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5197         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5198         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5199
5200         if (backref_namelen <= BTRFS_NAME_LEN) {
5201                 len = backref_namelen;
5202         } else {
5203                 len = BTRFS_NAME_LEN;
5204                 warning("%s[%llu %llu] ref_name too long",
5205                         key.type == BTRFS_ROOT_REF_KEY ?
5206                         "ROOT_REF" : "ROOT_BACKREF",
5207                         key.objectid, key.offset);
5208         }
5209         read_extent_buffer(path.nodes[0], backref_name,
5210                            (unsigned long)(backref + 1), len);
5211
5212         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5213             ref_namelen != backref_namelen ||
5214             strncmp(ref_name, backref_name, len)) {
5215                 err |= ROOT_REF_MISMATCH;
5216                 error("%s[%llu %llu] mismatch relative ref",
5217                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5218                       "ROOT_REF" : "ROOT_BACKREF",
5219                       ref_key->objectid, ref_key->offset);
5220         }
5221 out:
5222         btrfs_release_path(&path);
5223         return err;
5224 }
5225
5226 /*
5227  * Check all fs/file tree in low_memory mode.
5228  *
5229  * 1. for fs tree root item, call check_fs_root_v2()
5230  * 2. for fs tree root ref/backref, call check_root_ref()
5231  *
5232  * Return 0 if no error occurred.
5233  */
5234 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5235 {
5236         struct btrfs_root *tree_root = fs_info->tree_root;
5237         struct btrfs_root *cur_root = NULL;
5238         struct btrfs_path path;
5239         struct btrfs_key key;
5240         struct extent_buffer *node;
5241         unsigned int ext_ref;
5242         int slot;
5243         int ret;
5244         int err = 0;
5245
5246         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5247
5248         btrfs_init_path(&path);
5249         key.objectid = BTRFS_FS_TREE_OBJECTID;
5250         key.offset = 0;
5251         key.type = BTRFS_ROOT_ITEM_KEY;
5252
5253         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5254         if (ret < 0) {
5255                 err = ret;
5256                 goto out;
5257         } else if (ret > 0) {
5258                 err = -ENOENT;
5259                 goto out;
5260         }
5261
5262         while (1) {
5263                 node = path.nodes[0];
5264                 slot = path.slots[0];
5265                 btrfs_item_key_to_cpu(node, &key, slot);
5266                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5267                         goto out;
5268                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5269                     fs_root_objectid(key.objectid)) {
5270                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5271                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5272                                                                        &key);
5273                         } else {
5274                                 key.offset = (u64)-1;
5275                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5276                         }
5277
5278                         if (IS_ERR(cur_root)) {
5279                                 error("Fail to read fs/subvol tree: %lld",
5280                                       key.objectid);
5281                                 err = -EIO;
5282                                 goto next;
5283                         }
5284
5285                         ret = check_fs_root_v2(cur_root, ext_ref);
5286                         err |= ret;
5287
5288                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5289                                 btrfs_free_fs_root(cur_root);
5290                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5291                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5292                         ret = check_root_ref(tree_root, &key, node, slot);
5293                         err |= ret;
5294                 }
5295 next:
5296                 ret = btrfs_next_item(tree_root, &path);
5297                 if (ret > 0)
5298                         goto out;
5299                 if (ret < 0) {
5300                         err = ret;
5301                         goto out;
5302                 }
5303         }
5304
5305 out:
5306         btrfs_release_path(&path);
5307         return err;
5308 }
5309
5310 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5311 {
5312         struct list_head *cur = rec->backrefs.next;
5313         struct extent_backref *back;
5314         struct tree_backref *tback;
5315         struct data_backref *dback;
5316         u64 found = 0;
5317         int err = 0;
5318
5319         while(cur != &rec->backrefs) {
5320                 back = to_extent_backref(cur);
5321                 cur = cur->next;
5322                 if (!back->found_extent_tree) {
5323                         err = 1;
5324                         if (!print_errs)
5325                                 goto out;
5326                         if (back->is_data) {
5327                                 dback = to_data_backref(back);
5328                                 fprintf(stderr, "Backref %llu %s %llu"
5329                                         " owner %llu offset %llu num_refs %lu"
5330                                         " not found in extent tree\n",
5331                                         (unsigned long long)rec->start,
5332                                         back->full_backref ?
5333                                         "parent" : "root",
5334                                         back->full_backref ?
5335                                         (unsigned long long)dback->parent:
5336                                         (unsigned long long)dback->root,
5337                                         (unsigned long long)dback->owner,
5338                                         (unsigned long long)dback->offset,
5339                                         (unsigned long)dback->num_refs);
5340                         } else {
5341                                 tback = to_tree_backref(back);
5342                                 fprintf(stderr, "Backref %llu parent %llu"
5343                                         " root %llu not found in extent tree\n",
5344                                         (unsigned long long)rec->start,
5345                                         (unsigned long long)tback->parent,
5346                                         (unsigned long long)tback->root);
5347                         }
5348                 }
5349                 if (!back->is_data && !back->found_ref) {
5350                         err = 1;
5351                         if (!print_errs)
5352                                 goto out;
5353                         tback = to_tree_backref(back);
5354                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5355                                 (unsigned long long)rec->start,
5356                                 back->full_backref ? "parent" : "root",
5357                                 back->full_backref ?
5358                                 (unsigned long long)tback->parent :
5359                                 (unsigned long long)tback->root, back);
5360                 }
5361                 if (back->is_data) {
5362                         dback = to_data_backref(back);
5363                         if (dback->found_ref != dback->num_refs) {
5364                                 err = 1;
5365                                 if (!print_errs)
5366                                         goto out;
5367                                 fprintf(stderr, "Incorrect local backref count"
5368                                         " on %llu %s %llu owner %llu"
5369                                         " offset %llu found %u wanted %u back %p\n",
5370                                         (unsigned long long)rec->start,
5371                                         back->full_backref ?
5372                                         "parent" : "root",
5373                                         back->full_backref ?
5374                                         (unsigned long long)dback->parent:
5375                                         (unsigned long long)dback->root,
5376                                         (unsigned long long)dback->owner,
5377                                         (unsigned long long)dback->offset,
5378                                         dback->found_ref, dback->num_refs, back);
5379                         }
5380                         if (dback->disk_bytenr != rec->start) {
5381                                 err = 1;
5382                                 if (!print_errs)
5383                                         goto out;
5384                                 fprintf(stderr, "Backref disk bytenr does not"
5385                                         " match extent record, bytenr=%llu, "
5386                                         "ref bytenr=%llu\n",
5387                                         (unsigned long long)rec->start,
5388                                         (unsigned long long)dback->disk_bytenr);
5389                         }
5390
5391                         if (dback->bytes != rec->nr) {
5392                                 err = 1;
5393                                 if (!print_errs)
5394                                         goto out;
5395                                 fprintf(stderr, "Backref bytes do not match "
5396                                         "extent backref, bytenr=%llu, ref "
5397                                         "bytes=%llu, backref bytes=%llu\n",
5398                                         (unsigned long long)rec->start,
5399                                         (unsigned long long)rec->nr,
5400                                         (unsigned long long)dback->bytes);
5401                         }
5402                 }
5403                 if (!back->is_data) {
5404                         found += 1;
5405                 } else {
5406                         dback = to_data_backref(back);
5407                         found += dback->found_ref;
5408                 }
5409         }
5410         if (found != rec->refs) {
5411                 err = 1;
5412                 if (!print_errs)
5413                         goto out;
5414                 fprintf(stderr, "Incorrect global backref count "
5415                         "on %llu found %llu wanted %llu\n",
5416                         (unsigned long long)rec->start,
5417                         (unsigned long long)found,
5418                         (unsigned long long)rec->refs);
5419         }
5420 out:
5421         return err;
5422 }
5423
5424 static int free_all_extent_backrefs(struct extent_record *rec)
5425 {
5426         struct extent_backref *back;
5427         struct list_head *cur;
5428         while (!list_empty(&rec->backrefs)) {
5429                 cur = rec->backrefs.next;
5430                 back = to_extent_backref(cur);
5431                 list_del(cur);
5432                 free(back);
5433         }
5434         return 0;
5435 }
5436
5437 static void free_extent_record_cache(struct cache_tree *extent_cache)
5438 {
5439         struct cache_extent *cache;
5440         struct extent_record *rec;
5441
5442         while (1) {
5443                 cache = first_cache_extent(extent_cache);
5444                 if (!cache)
5445                         break;
5446                 rec = container_of(cache, struct extent_record, cache);
5447                 remove_cache_extent(extent_cache, cache);
5448                 free_all_extent_backrefs(rec);
5449                 free(rec);
5450         }
5451 }
5452
5453 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5454                                  struct extent_record *rec)
5455 {
5456         if (rec->content_checked && rec->owner_ref_checked &&
5457             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5458             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5459             !rec->bad_full_backref && !rec->crossing_stripes &&
5460             !rec->wrong_chunk_type) {
5461                 remove_cache_extent(extent_cache, &rec->cache);
5462                 free_all_extent_backrefs(rec);
5463                 list_del_init(&rec->list);
5464                 free(rec);
5465         }
5466         return 0;
5467 }
5468
5469 static int check_owner_ref(struct btrfs_root *root,
5470                             struct extent_record *rec,
5471                             struct extent_buffer *buf)
5472 {
5473         struct extent_backref *node;
5474         struct tree_backref *back;
5475         struct btrfs_root *ref_root;
5476         struct btrfs_key key;
5477         struct btrfs_path path;
5478         struct extent_buffer *parent;
5479         int level;
5480         int found = 0;
5481         int ret;
5482
5483         list_for_each_entry(node, &rec->backrefs, list) {
5484                 if (node->is_data)
5485                         continue;
5486                 if (!node->found_ref)
5487                         continue;
5488                 if (node->full_backref)
5489                         continue;
5490                 back = to_tree_backref(node);
5491                 if (btrfs_header_owner(buf) == back->root)
5492                         return 0;
5493         }
5494         BUG_ON(rec->is_root);
5495
5496         /* try to find the block by search corresponding fs tree */
5497         key.objectid = btrfs_header_owner(buf);
5498         key.type = BTRFS_ROOT_ITEM_KEY;
5499         key.offset = (u64)-1;
5500
5501         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5502         if (IS_ERR(ref_root))
5503                 return 1;
5504
5505         level = btrfs_header_level(buf);
5506         if (level == 0)
5507                 btrfs_item_key_to_cpu(buf, &key, 0);
5508         else
5509                 btrfs_node_key_to_cpu(buf, &key, 0);
5510
5511         btrfs_init_path(&path);
5512         path.lowest_level = level + 1;
5513         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5514         if (ret < 0)
5515                 return 0;
5516
5517         parent = path.nodes[level + 1];
5518         if (parent && buf->start == btrfs_node_blockptr(parent,
5519                                                         path.slots[level + 1]))
5520                 found = 1;
5521
5522         btrfs_release_path(&path);
5523         return found ? 0 : 1;
5524 }
5525
5526 static int is_extent_tree_record(struct extent_record *rec)
5527 {
5528         struct list_head *cur = rec->backrefs.next;
5529         struct extent_backref *node;
5530         struct tree_backref *back;
5531         int is_extent = 0;
5532
5533         while(cur != &rec->backrefs) {
5534                 node = to_extent_backref(cur);
5535                 cur = cur->next;
5536                 if (node->is_data)
5537                         return 0;
5538                 back = to_tree_backref(node);
5539                 if (node->full_backref)
5540                         return 0;
5541                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5542                         is_extent = 1;
5543         }
5544         return is_extent;
5545 }
5546
5547
5548 static int record_bad_block_io(struct btrfs_fs_info *info,
5549                                struct cache_tree *extent_cache,
5550                                u64 start, u64 len)
5551 {
5552         struct extent_record *rec;
5553         struct cache_extent *cache;
5554         struct btrfs_key key;
5555
5556         cache = lookup_cache_extent(extent_cache, start, len);
5557         if (!cache)
5558                 return 0;
5559
5560         rec = container_of(cache, struct extent_record, cache);
5561         if (!is_extent_tree_record(rec))
5562                 return 0;
5563
5564         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5565         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5566 }
5567
5568 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5569                        struct extent_buffer *buf, int slot)
5570 {
5571         if (btrfs_header_level(buf)) {
5572                 struct btrfs_key_ptr ptr1, ptr2;
5573
5574                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5575                                    sizeof(struct btrfs_key_ptr));
5576                 read_extent_buffer(buf, &ptr2,
5577                                    btrfs_node_key_ptr_offset(slot + 1),
5578                                    sizeof(struct btrfs_key_ptr));
5579                 write_extent_buffer(buf, &ptr1,
5580                                     btrfs_node_key_ptr_offset(slot + 1),
5581                                     sizeof(struct btrfs_key_ptr));
5582                 write_extent_buffer(buf, &ptr2,
5583                                     btrfs_node_key_ptr_offset(slot),
5584                                     sizeof(struct btrfs_key_ptr));
5585                 if (slot == 0) {
5586                         struct btrfs_disk_key key;
5587                         btrfs_node_key(buf, &key, 0);
5588                         btrfs_fixup_low_keys(root, path, &key,
5589                                              btrfs_header_level(buf) + 1);
5590                 }
5591         } else {
5592                 struct btrfs_item *item1, *item2;
5593                 struct btrfs_key k1, k2;
5594                 char *item1_data, *item2_data;
5595                 u32 item1_offset, item2_offset, item1_size, item2_size;
5596
5597                 item1 = btrfs_item_nr(slot);
5598                 item2 = btrfs_item_nr(slot + 1);
5599                 btrfs_item_key_to_cpu(buf, &k1, slot);
5600                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5601                 item1_offset = btrfs_item_offset(buf, item1);
5602                 item2_offset = btrfs_item_offset(buf, item2);
5603                 item1_size = btrfs_item_size(buf, item1);
5604                 item2_size = btrfs_item_size(buf, item2);
5605
5606                 item1_data = malloc(item1_size);
5607                 if (!item1_data)
5608                         return -ENOMEM;
5609                 item2_data = malloc(item2_size);
5610                 if (!item2_data) {
5611                         free(item1_data);
5612                         return -ENOMEM;
5613                 }
5614
5615                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5616                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5617
5618                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5619                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5620                 free(item1_data);
5621                 free(item2_data);
5622
5623                 btrfs_set_item_offset(buf, item1, item2_offset);
5624                 btrfs_set_item_offset(buf, item2, item1_offset);
5625                 btrfs_set_item_size(buf, item1, item2_size);
5626                 btrfs_set_item_size(buf, item2, item1_size);
5627
5628                 path->slots[0] = slot;
5629                 btrfs_set_item_key_unsafe(root, path, &k2);
5630                 path->slots[0] = slot + 1;
5631                 btrfs_set_item_key_unsafe(root, path, &k1);
5632         }
5633         return 0;
5634 }
5635
5636 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5637 {
5638         struct extent_buffer *buf;
5639         struct btrfs_key k1, k2;
5640         int i;
5641         int level = path->lowest_level;
5642         int ret = -EIO;
5643
5644         buf = path->nodes[level];
5645         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5646                 if (level) {
5647                         btrfs_node_key_to_cpu(buf, &k1, i);
5648                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5649                 } else {
5650                         btrfs_item_key_to_cpu(buf, &k1, i);
5651                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5652                 }
5653                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5654                         continue;
5655                 ret = swap_values(root, path, buf, i);
5656                 if (ret)
5657                         break;
5658                 btrfs_mark_buffer_dirty(buf);
5659                 i = 0;
5660         }
5661         return ret;
5662 }
5663
5664 static int delete_bogus_item(struct btrfs_root *root,
5665                              struct btrfs_path *path,
5666                              struct extent_buffer *buf, int slot)
5667 {
5668         struct btrfs_key key;
5669         int nritems = btrfs_header_nritems(buf);
5670
5671         btrfs_item_key_to_cpu(buf, &key, slot);
5672
5673         /* These are all the keys we can deal with missing. */
5674         if (key.type != BTRFS_DIR_INDEX_KEY &&
5675             key.type != BTRFS_EXTENT_ITEM_KEY &&
5676             key.type != BTRFS_METADATA_ITEM_KEY &&
5677             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5678             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5679                 return -1;
5680
5681         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5682                (unsigned long long)key.objectid, key.type,
5683                (unsigned long long)key.offset, slot, buf->start);
5684         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5685                               btrfs_item_nr_offset(slot + 1),
5686                               sizeof(struct btrfs_item) *
5687                               (nritems - slot - 1));
5688         btrfs_set_header_nritems(buf, nritems - 1);
5689         if (slot == 0) {
5690                 struct btrfs_disk_key disk_key;
5691
5692                 btrfs_item_key(buf, &disk_key, 0);
5693                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5694         }
5695         btrfs_mark_buffer_dirty(buf);
5696         return 0;
5697 }
5698
5699 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5700 {
5701         struct extent_buffer *buf;
5702         int i;
5703         int ret = 0;
5704
5705         /* We should only get this for leaves */
5706         BUG_ON(path->lowest_level);
5707         buf = path->nodes[0];
5708 again:
5709         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5710                 unsigned int shift = 0, offset;
5711
5712                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5713                     BTRFS_LEAF_DATA_SIZE(root)) {
5714                         if (btrfs_item_end_nr(buf, i) >
5715                             BTRFS_LEAF_DATA_SIZE(root)) {
5716                                 ret = delete_bogus_item(root, path, buf, i);
5717                                 if (!ret)
5718                                         goto again;
5719                                 fprintf(stderr, "item is off the end of the "
5720                                         "leaf, can't fix\n");
5721                                 ret = -EIO;
5722                                 break;
5723                         }
5724                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5725                                 btrfs_item_end_nr(buf, i);
5726                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5727                            btrfs_item_offset_nr(buf, i - 1)) {
5728                         if (btrfs_item_end_nr(buf, i) >
5729                             btrfs_item_offset_nr(buf, i - 1)) {
5730                                 ret = delete_bogus_item(root, path, buf, i);
5731                                 if (!ret)
5732                                         goto again;
5733                                 fprintf(stderr, "items overlap, can't fix\n");
5734                                 ret = -EIO;
5735                                 break;
5736                         }
5737                         shift = btrfs_item_offset_nr(buf, i - 1) -
5738                                 btrfs_item_end_nr(buf, i);
5739                 }
5740                 if (!shift)
5741                         continue;
5742
5743                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5744                        i, shift, (unsigned long long)buf->start);
5745                 offset = btrfs_item_offset_nr(buf, i);
5746                 memmove_extent_buffer(buf,
5747                                       btrfs_leaf_data(buf) + offset + shift,
5748                                       btrfs_leaf_data(buf) + offset,
5749                                       btrfs_item_size_nr(buf, i));
5750                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5751                                       offset + shift);
5752                 btrfs_mark_buffer_dirty(buf);
5753         }
5754
5755         /*
5756          * We may have moved things, in which case we want to exit so we don't
5757          * write those changes out.  Once we have proper abort functionality in
5758          * progs this can be changed to something nicer.
5759          */
5760         BUG_ON(ret);
5761         return ret;
5762 }
5763
5764 /*
5765  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5766  * then just return -EIO.
5767  */
5768 static int try_to_fix_bad_block(struct btrfs_root *root,
5769                                 struct extent_buffer *buf,
5770                                 enum btrfs_tree_block_status status)
5771 {
5772         struct btrfs_trans_handle *trans;
5773         struct ulist *roots;
5774         struct ulist_node *node;
5775         struct btrfs_root *search_root;
5776         struct btrfs_path path;
5777         struct ulist_iterator iter;
5778         struct btrfs_key root_key, key;
5779         int ret;
5780
5781         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5782             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5783                 return -EIO;
5784
5785         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5786         if (ret)
5787                 return -EIO;
5788
5789         btrfs_init_path(&path);
5790         ULIST_ITER_INIT(&iter);
5791         while ((node = ulist_next(roots, &iter))) {
5792                 root_key.objectid = node->val;
5793                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5794                 root_key.offset = (u64)-1;
5795
5796                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5797                 if (IS_ERR(root)) {
5798                         ret = -EIO;
5799                         break;
5800                 }
5801
5802
5803                 trans = btrfs_start_transaction(search_root, 0);
5804                 if (IS_ERR(trans)) {
5805                         ret = PTR_ERR(trans);
5806                         break;
5807                 }
5808
5809                 path.lowest_level = btrfs_header_level(buf);
5810                 path.skip_check_block = 1;
5811                 if (path.lowest_level)
5812                         btrfs_node_key_to_cpu(buf, &key, 0);
5813                 else
5814                         btrfs_item_key_to_cpu(buf, &key, 0);
5815                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5816                 if (ret) {
5817                         ret = -EIO;
5818                         btrfs_commit_transaction(trans, search_root);
5819                         break;
5820                 }
5821                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5822                         ret = fix_key_order(search_root, &path);
5823                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5824                         ret = fix_item_offset(search_root, &path);
5825                 if (ret) {
5826                         btrfs_commit_transaction(trans, search_root);
5827                         break;
5828                 }
5829                 btrfs_release_path(&path);
5830                 btrfs_commit_transaction(trans, search_root);
5831         }
5832         ulist_free(roots);
5833         btrfs_release_path(&path);
5834         return ret;
5835 }
5836
5837 static int check_block(struct btrfs_root *root,
5838                        struct cache_tree *extent_cache,
5839                        struct extent_buffer *buf, u64 flags)
5840 {
5841         struct extent_record *rec;
5842         struct cache_extent *cache;
5843         struct btrfs_key key;
5844         enum btrfs_tree_block_status status;
5845         int ret = 0;
5846         int level;
5847
5848         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5849         if (!cache)
5850                 return 1;
5851         rec = container_of(cache, struct extent_record, cache);
5852         rec->generation = btrfs_header_generation(buf);
5853
5854         level = btrfs_header_level(buf);
5855         if (btrfs_header_nritems(buf) > 0) {
5856
5857                 if (level == 0)
5858                         btrfs_item_key_to_cpu(buf, &key, 0);
5859                 else
5860                         btrfs_node_key_to_cpu(buf, &key, 0);
5861
5862                 rec->info_objectid = key.objectid;
5863         }
5864         rec->info_level = level;
5865
5866         if (btrfs_is_leaf(buf))
5867                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5868         else
5869                 status = btrfs_check_node(root, &rec->parent_key, buf);
5870
5871         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5872                 if (repair)
5873                         status = try_to_fix_bad_block(root, buf, status);
5874                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5875                         ret = -EIO;
5876                         fprintf(stderr, "bad block %llu\n",
5877                                 (unsigned long long)buf->start);
5878                 } else {
5879                         /*
5880                          * Signal to callers we need to start the scan over
5881                          * again since we'll have cowed blocks.
5882                          */
5883                         ret = -EAGAIN;
5884                 }
5885         } else {
5886                 rec->content_checked = 1;
5887                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5888                         rec->owner_ref_checked = 1;
5889                 else {
5890                         ret = check_owner_ref(root, rec, buf);
5891                         if (!ret)
5892                                 rec->owner_ref_checked = 1;
5893                 }
5894         }
5895         if (!ret)
5896                 maybe_free_extent_rec(extent_cache, rec);
5897         return ret;
5898 }
5899
5900 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5901                                                 u64 parent, u64 root)
5902 {
5903         struct list_head *cur = rec->backrefs.next;
5904         struct extent_backref *node;
5905         struct tree_backref *back;
5906
5907         while(cur != &rec->backrefs) {
5908                 node = to_extent_backref(cur);
5909                 cur = cur->next;
5910                 if (node->is_data)
5911                         continue;
5912                 back = to_tree_backref(node);
5913                 if (parent > 0) {
5914                         if (!node->full_backref)
5915                                 continue;
5916                         if (parent == back->parent)
5917                                 return back;
5918                 } else {
5919                         if (node->full_backref)
5920                                 continue;
5921                         if (back->root == root)
5922                                 return back;
5923                 }
5924         }
5925         return NULL;
5926 }
5927
5928 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5929                                                 u64 parent, u64 root)
5930 {
5931         struct tree_backref *ref = malloc(sizeof(*ref));
5932
5933         if (!ref)
5934                 return NULL;
5935         memset(&ref->node, 0, sizeof(ref->node));
5936         if (parent > 0) {
5937                 ref->parent = parent;
5938                 ref->node.full_backref = 1;
5939         } else {
5940                 ref->root = root;
5941                 ref->node.full_backref = 0;
5942         }
5943         list_add_tail(&ref->node.list, &rec->backrefs);
5944
5945         return ref;
5946 }
5947
5948 static struct data_backref *find_data_backref(struct extent_record *rec,
5949                                                 u64 parent, u64 root,
5950                                                 u64 owner, u64 offset,
5951                                                 int found_ref,
5952                                                 u64 disk_bytenr, u64 bytes)
5953 {
5954         struct list_head *cur = rec->backrefs.next;
5955         struct extent_backref *node;
5956         struct data_backref *back;
5957
5958         while(cur != &rec->backrefs) {
5959                 node = to_extent_backref(cur);
5960                 cur = cur->next;
5961                 if (!node->is_data)
5962                         continue;
5963                 back = to_data_backref(node);
5964                 if (parent > 0) {
5965                         if (!node->full_backref)
5966                                 continue;
5967                         if (parent == back->parent)
5968                                 return back;
5969                 } else {
5970                         if (node->full_backref)
5971                                 continue;
5972                         if (back->root == root && back->owner == owner &&
5973                             back->offset == offset) {
5974                                 if (found_ref && node->found_ref &&
5975                                     (back->bytes != bytes ||
5976                                     back->disk_bytenr != disk_bytenr))
5977                                         continue;
5978                                 return back;
5979                         }
5980                 }
5981         }
5982         return NULL;
5983 }
5984
5985 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5986                                                 u64 parent, u64 root,
5987                                                 u64 owner, u64 offset,
5988                                                 u64 max_size)
5989 {
5990         struct data_backref *ref = malloc(sizeof(*ref));
5991
5992         if (!ref)
5993                 return NULL;
5994         memset(&ref->node, 0, sizeof(ref->node));
5995         ref->node.is_data = 1;
5996
5997         if (parent > 0) {
5998                 ref->parent = parent;
5999                 ref->owner = 0;
6000                 ref->offset = 0;
6001                 ref->node.full_backref = 1;
6002         } else {
6003                 ref->root = root;
6004                 ref->owner = owner;
6005                 ref->offset = offset;
6006                 ref->node.full_backref = 0;
6007         }
6008         ref->bytes = max_size;
6009         ref->found_ref = 0;
6010         ref->num_refs = 0;
6011         list_add_tail(&ref->node.list, &rec->backrefs);
6012         if (max_size > rec->max_size)
6013                 rec->max_size = max_size;
6014         return ref;
6015 }
6016
6017 /* Check if the type of extent matches with its chunk */
6018 static void check_extent_type(struct extent_record *rec)
6019 {
6020         struct btrfs_block_group_cache *bg_cache;
6021
6022         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6023         if (!bg_cache)
6024                 return;
6025
6026         /* data extent, check chunk directly*/
6027         if (!rec->metadata) {
6028                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6029                         rec->wrong_chunk_type = 1;
6030                 return;
6031         }
6032
6033         /* metadata extent, check the obvious case first */
6034         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6035                                  BTRFS_BLOCK_GROUP_METADATA))) {
6036                 rec->wrong_chunk_type = 1;
6037                 return;
6038         }
6039
6040         /*
6041          * Check SYSTEM extent, as it's also marked as metadata, we can only
6042          * make sure it's a SYSTEM extent by its backref
6043          */
6044         if (!list_empty(&rec->backrefs)) {
6045                 struct extent_backref *node;
6046                 struct tree_backref *tback;
6047                 u64 bg_type;
6048
6049                 node = to_extent_backref(rec->backrefs.next);
6050                 if (node->is_data) {
6051                         /* tree block shouldn't have data backref */
6052                         rec->wrong_chunk_type = 1;
6053                         return;
6054                 }
6055                 tback = container_of(node, struct tree_backref, node);
6056
6057                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6058                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6059                 else
6060                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6061                 if (!(bg_cache->flags & bg_type))
6062                         rec->wrong_chunk_type = 1;
6063         }
6064 }
6065
6066 /*
6067  * Allocate a new extent record, fill default values from @tmpl and insert int
6068  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6069  * the cache, otherwise it fails.
6070  */
6071 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6072                 struct extent_record *tmpl)
6073 {
6074         struct extent_record *rec;
6075         int ret = 0;
6076
6077         BUG_ON(tmpl->max_size == 0);
6078         rec = malloc(sizeof(*rec));
6079         if (!rec)
6080                 return -ENOMEM;
6081         rec->start = tmpl->start;
6082         rec->max_size = tmpl->max_size;
6083         rec->nr = max(tmpl->nr, tmpl->max_size);
6084         rec->found_rec = tmpl->found_rec;
6085         rec->content_checked = tmpl->content_checked;
6086         rec->owner_ref_checked = tmpl->owner_ref_checked;
6087         rec->num_duplicates = 0;
6088         rec->metadata = tmpl->metadata;
6089         rec->flag_block_full_backref = FLAG_UNSET;
6090         rec->bad_full_backref = 0;
6091         rec->crossing_stripes = 0;
6092         rec->wrong_chunk_type = 0;
6093         rec->is_root = tmpl->is_root;
6094         rec->refs = tmpl->refs;
6095         rec->extent_item_refs = tmpl->extent_item_refs;
6096         rec->parent_generation = tmpl->parent_generation;
6097         INIT_LIST_HEAD(&rec->backrefs);
6098         INIT_LIST_HEAD(&rec->dups);
6099         INIT_LIST_HEAD(&rec->list);
6100         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6101         rec->cache.start = tmpl->start;
6102         rec->cache.size = tmpl->nr;
6103         ret = insert_cache_extent(extent_cache, &rec->cache);
6104         if (ret) {
6105                 free(rec);
6106                 return ret;
6107         }
6108         bytes_used += rec->nr;
6109
6110         if (tmpl->metadata)
6111                 rec->crossing_stripes = check_crossing_stripes(global_info,
6112                                 rec->start, global_info->nodesize);
6113         check_extent_type(rec);
6114         return ret;
6115 }
6116
6117 /*
6118  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6119  * some are hints:
6120  * - refs              - if found, increase refs
6121  * - is_root           - if found, set
6122  * - content_checked   - if found, set
6123  * - owner_ref_checked - if found, set
6124  *
6125  * If not found, create a new one, initialize and insert.
6126  */
6127 static int add_extent_rec(struct cache_tree *extent_cache,
6128                 struct extent_record *tmpl)
6129 {
6130         struct extent_record *rec;
6131         struct cache_extent *cache;
6132         int ret = 0;
6133         int dup = 0;
6134
6135         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6136         if (cache) {
6137                 rec = container_of(cache, struct extent_record, cache);
6138                 if (tmpl->refs)
6139                         rec->refs++;
6140                 if (rec->nr == 1)
6141                         rec->nr = max(tmpl->nr, tmpl->max_size);
6142
6143                 /*
6144                  * We need to make sure to reset nr to whatever the extent
6145                  * record says was the real size, this way we can compare it to
6146                  * the backrefs.
6147                  */
6148                 if (tmpl->found_rec) {
6149                         if (tmpl->start != rec->start || rec->found_rec) {
6150                                 struct extent_record *tmp;
6151
6152                                 dup = 1;
6153                                 if (list_empty(&rec->list))
6154                                         list_add_tail(&rec->list,
6155                                                       &duplicate_extents);
6156
6157                                 /*
6158                                  * We have to do this song and dance in case we
6159                                  * find an extent record that falls inside of
6160                                  * our current extent record but does not have
6161                                  * the same objectid.
6162                                  */
6163                                 tmp = malloc(sizeof(*tmp));
6164                                 if (!tmp)
6165                                         return -ENOMEM;
6166                                 tmp->start = tmpl->start;
6167                                 tmp->max_size = tmpl->max_size;
6168                                 tmp->nr = tmpl->nr;
6169                                 tmp->found_rec = 1;
6170                                 tmp->metadata = tmpl->metadata;
6171                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6172                                 INIT_LIST_HEAD(&tmp->list);
6173                                 list_add_tail(&tmp->list, &rec->dups);
6174                                 rec->num_duplicates++;
6175                         } else {
6176                                 rec->nr = tmpl->nr;
6177                                 rec->found_rec = 1;
6178                         }
6179                 }
6180
6181                 if (tmpl->extent_item_refs && !dup) {
6182                         if (rec->extent_item_refs) {
6183                                 fprintf(stderr, "block %llu rec "
6184                                         "extent_item_refs %llu, passed %llu\n",
6185                                         (unsigned long long)tmpl->start,
6186                                         (unsigned long long)
6187                                                         rec->extent_item_refs,
6188                                         (unsigned long long)tmpl->extent_item_refs);
6189                         }
6190                         rec->extent_item_refs = tmpl->extent_item_refs;
6191                 }
6192                 if (tmpl->is_root)
6193                         rec->is_root = 1;
6194                 if (tmpl->content_checked)
6195                         rec->content_checked = 1;
6196                 if (tmpl->owner_ref_checked)
6197                         rec->owner_ref_checked = 1;
6198                 memcpy(&rec->parent_key, &tmpl->parent_key,
6199                                 sizeof(tmpl->parent_key));
6200                 if (tmpl->parent_generation)
6201                         rec->parent_generation = tmpl->parent_generation;
6202                 if (rec->max_size < tmpl->max_size)
6203                         rec->max_size = tmpl->max_size;
6204
6205                 /*
6206                  * A metadata extent can't cross stripe_len boundary, otherwise
6207                  * kernel scrub won't be able to handle it.
6208                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6209                  * it.
6210                  */
6211                 if (tmpl->metadata)
6212                         rec->crossing_stripes = check_crossing_stripes(
6213                                         global_info, rec->start,
6214                                         global_info->nodesize);
6215                 check_extent_type(rec);
6216                 maybe_free_extent_rec(extent_cache, rec);
6217                 return ret;
6218         }
6219
6220         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6221
6222         return ret;
6223 }
6224
6225 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6226                             u64 parent, u64 root, int found_ref)
6227 {
6228         struct extent_record *rec;
6229         struct tree_backref *back;
6230         struct cache_extent *cache;
6231         int ret;
6232
6233         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6234         if (!cache) {
6235                 struct extent_record tmpl;
6236
6237                 memset(&tmpl, 0, sizeof(tmpl));
6238                 tmpl.start = bytenr;
6239                 tmpl.nr = 1;
6240                 tmpl.metadata = 1;
6241                 tmpl.max_size = 1;
6242
6243                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6244                 if (ret)
6245                         return ret;
6246
6247                 /* really a bug in cache_extent implement now */
6248                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6249                 if (!cache)
6250                         return -ENOENT;
6251         }
6252
6253         rec = container_of(cache, struct extent_record, cache);
6254         if (rec->start != bytenr) {
6255                 /*
6256                  * Several cause, from unaligned bytenr to over lapping extents
6257                  */
6258                 return -EEXIST;
6259         }
6260
6261         back = find_tree_backref(rec, parent, root);
6262         if (!back) {
6263                 back = alloc_tree_backref(rec, parent, root);
6264                 if (!back)
6265                         return -ENOMEM;
6266         }
6267
6268         if (found_ref) {
6269                 if (back->node.found_ref) {
6270                         fprintf(stderr, "Extent back ref already exists "
6271                                 "for %llu parent %llu root %llu \n",
6272                                 (unsigned long long)bytenr,
6273                                 (unsigned long long)parent,
6274                                 (unsigned long long)root);
6275                 }
6276                 back->node.found_ref = 1;
6277         } else {
6278                 if (back->node.found_extent_tree) {
6279                         fprintf(stderr, "Extent back ref already exists "
6280                                 "for %llu parent %llu root %llu \n",
6281                                 (unsigned long long)bytenr,
6282                                 (unsigned long long)parent,
6283                                 (unsigned long long)root);
6284                 }
6285                 back->node.found_extent_tree = 1;
6286         }
6287         check_extent_type(rec);
6288         maybe_free_extent_rec(extent_cache, rec);
6289         return 0;
6290 }
6291
6292 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6293                             u64 parent, u64 root, u64 owner, u64 offset,
6294                             u32 num_refs, int found_ref, u64 max_size)
6295 {
6296         struct extent_record *rec;
6297         struct data_backref *back;
6298         struct cache_extent *cache;
6299         int ret;
6300
6301         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6302         if (!cache) {
6303                 struct extent_record tmpl;
6304
6305                 memset(&tmpl, 0, sizeof(tmpl));
6306                 tmpl.start = bytenr;
6307                 tmpl.nr = 1;
6308                 tmpl.max_size = max_size;
6309
6310                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6311                 if (ret)
6312                         return ret;
6313
6314                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6315                 if (!cache)
6316                         abort();
6317         }
6318
6319         rec = container_of(cache, struct extent_record, cache);
6320         if (rec->max_size < max_size)
6321                 rec->max_size = max_size;
6322
6323         /*
6324          * If found_ref is set then max_size is the real size and must match the
6325          * existing refs.  So if we have already found a ref then we need to
6326          * make sure that this ref matches the existing one, otherwise we need
6327          * to add a new backref so we can notice that the backrefs don't match
6328          * and we need to figure out who is telling the truth.  This is to
6329          * account for that awful fsync bug I introduced where we'd end up with
6330          * a btrfs_file_extent_item that would have its length include multiple
6331          * prealloc extents or point inside of a prealloc extent.
6332          */
6333         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6334                                  bytenr, max_size);
6335         if (!back) {
6336                 back = alloc_data_backref(rec, parent, root, owner, offset,
6337                                           max_size);
6338                 BUG_ON(!back);
6339         }
6340
6341         if (found_ref) {
6342                 BUG_ON(num_refs != 1);
6343                 if (back->node.found_ref)
6344                         BUG_ON(back->bytes != max_size);
6345                 back->node.found_ref = 1;
6346                 back->found_ref += 1;
6347                 back->bytes = max_size;
6348                 back->disk_bytenr = bytenr;
6349                 rec->refs += 1;
6350                 rec->content_checked = 1;
6351                 rec->owner_ref_checked = 1;
6352         } else {
6353                 if (back->node.found_extent_tree) {
6354                         fprintf(stderr, "Extent back ref already exists "
6355                                 "for %llu parent %llu root %llu "
6356                                 "owner %llu offset %llu num_refs %lu\n",
6357                                 (unsigned long long)bytenr,
6358                                 (unsigned long long)parent,
6359                                 (unsigned long long)root,
6360                                 (unsigned long long)owner,
6361                                 (unsigned long long)offset,
6362                                 (unsigned long)num_refs);
6363                 }
6364                 back->num_refs = num_refs;
6365                 back->node.found_extent_tree = 1;
6366         }
6367         maybe_free_extent_rec(extent_cache, rec);
6368         return 0;
6369 }
6370
6371 static int add_pending(struct cache_tree *pending,
6372                        struct cache_tree *seen, u64 bytenr, u32 size)
6373 {
6374         int ret;
6375         ret = add_cache_extent(seen, bytenr, size);
6376         if (ret)
6377                 return ret;
6378         add_cache_extent(pending, bytenr, size);
6379         return 0;
6380 }
6381
6382 static int pick_next_pending(struct cache_tree *pending,
6383                         struct cache_tree *reada,
6384                         struct cache_tree *nodes,
6385                         u64 last, struct block_info *bits, int bits_nr,
6386                         int *reada_bits)
6387 {
6388         unsigned long node_start = last;
6389         struct cache_extent *cache;
6390         int ret;
6391
6392         cache = search_cache_extent(reada, 0);
6393         if (cache) {
6394                 bits[0].start = cache->start;
6395                 bits[0].size = cache->size;
6396                 *reada_bits = 1;
6397                 return 1;
6398         }
6399         *reada_bits = 0;
6400         if (node_start > 32768)
6401                 node_start -= 32768;
6402
6403         cache = search_cache_extent(nodes, node_start);
6404         if (!cache)
6405                 cache = search_cache_extent(nodes, 0);
6406
6407         if (!cache) {
6408                  cache = search_cache_extent(pending, 0);
6409                  if (!cache)
6410                          return 0;
6411                  ret = 0;
6412                  do {
6413                          bits[ret].start = cache->start;
6414                          bits[ret].size = cache->size;
6415                          cache = next_cache_extent(cache);
6416                          ret++;
6417                  } while (cache && ret < bits_nr);
6418                  return ret;
6419         }
6420
6421         ret = 0;
6422         do {
6423                 bits[ret].start = cache->start;
6424                 bits[ret].size = cache->size;
6425                 cache = next_cache_extent(cache);
6426                 ret++;
6427         } while (cache && ret < bits_nr);
6428
6429         if (bits_nr - ret > 8) {
6430                 u64 lookup = bits[0].start + bits[0].size;
6431                 struct cache_extent *next;
6432                 next = search_cache_extent(pending, lookup);
6433                 while(next) {
6434                         if (next->start - lookup > 32768)
6435                                 break;
6436                         bits[ret].start = next->start;
6437                         bits[ret].size = next->size;
6438                         lookup = next->start + next->size;
6439                         ret++;
6440                         if (ret == bits_nr)
6441                                 break;
6442                         next = next_cache_extent(next);
6443                         if (!next)
6444                                 break;
6445                 }
6446         }
6447         return ret;
6448 }
6449
6450 static void free_chunk_record(struct cache_extent *cache)
6451 {
6452         struct chunk_record *rec;
6453
6454         rec = container_of(cache, struct chunk_record, cache);
6455         list_del_init(&rec->list);
6456         list_del_init(&rec->dextents);
6457         free(rec);
6458 }
6459
6460 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6461 {
6462         cache_tree_free_extents(chunk_cache, free_chunk_record);
6463 }
6464
6465 static void free_device_record(struct rb_node *node)
6466 {
6467         struct device_record *rec;
6468
6469         rec = container_of(node, struct device_record, node);
6470         free(rec);
6471 }
6472
6473 FREE_RB_BASED_TREE(device_cache, free_device_record);
6474
6475 int insert_block_group_record(struct block_group_tree *tree,
6476                               struct block_group_record *bg_rec)
6477 {
6478         int ret;
6479
6480         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6481         if (ret)
6482                 return ret;
6483
6484         list_add_tail(&bg_rec->list, &tree->block_groups);
6485         return 0;
6486 }
6487
6488 static void free_block_group_record(struct cache_extent *cache)
6489 {
6490         struct block_group_record *rec;
6491
6492         rec = container_of(cache, struct block_group_record, cache);
6493         list_del_init(&rec->list);
6494         free(rec);
6495 }
6496
6497 void free_block_group_tree(struct block_group_tree *tree)
6498 {
6499         cache_tree_free_extents(&tree->tree, free_block_group_record);
6500 }
6501
6502 int insert_device_extent_record(struct device_extent_tree *tree,
6503                                 struct device_extent_record *de_rec)
6504 {
6505         int ret;
6506
6507         /*
6508          * Device extent is a bit different from the other extents, because
6509          * the extents which belong to the different devices may have the
6510          * same start and size, so we need use the special extent cache
6511          * search/insert functions.
6512          */
6513         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6514         if (ret)
6515                 return ret;
6516
6517         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6518         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6519         return 0;
6520 }
6521
6522 static void free_device_extent_record(struct cache_extent *cache)
6523 {
6524         struct device_extent_record *rec;
6525
6526         rec = container_of(cache, struct device_extent_record, cache);
6527         if (!list_empty(&rec->chunk_list))
6528                 list_del_init(&rec->chunk_list);
6529         if (!list_empty(&rec->device_list))
6530                 list_del_init(&rec->device_list);
6531         free(rec);
6532 }
6533
6534 void free_device_extent_tree(struct device_extent_tree *tree)
6535 {
6536         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6537 }
6538
6539 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6540 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6541                                  struct extent_buffer *leaf, int slot)
6542 {
6543         struct btrfs_extent_ref_v0 *ref0;
6544         struct btrfs_key key;
6545         int ret;
6546
6547         btrfs_item_key_to_cpu(leaf, &key, slot);
6548         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6549         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6550                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6551                                 0, 0);
6552         } else {
6553                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6554                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6555         }
6556         return ret;
6557 }
6558 #endif
6559
6560 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6561                                             struct btrfs_key *key,
6562                                             int slot)
6563 {
6564         struct btrfs_chunk *ptr;
6565         struct chunk_record *rec;
6566         int num_stripes, i;
6567
6568         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6569         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6570
6571         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6572         if (!rec) {
6573                 fprintf(stderr, "memory allocation failed\n");
6574                 exit(-1);
6575         }
6576
6577         INIT_LIST_HEAD(&rec->list);
6578         INIT_LIST_HEAD(&rec->dextents);
6579         rec->bg_rec = NULL;
6580
6581         rec->cache.start = key->offset;
6582         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6583
6584         rec->generation = btrfs_header_generation(leaf);
6585
6586         rec->objectid = key->objectid;
6587         rec->type = key->type;
6588         rec->offset = key->offset;
6589
6590         rec->length = rec->cache.size;
6591         rec->owner = btrfs_chunk_owner(leaf, ptr);
6592         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6593         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6594         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6595         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6596         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6597         rec->num_stripes = num_stripes;
6598         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6599
6600         for (i = 0; i < rec->num_stripes; ++i) {
6601                 rec->stripes[i].devid =
6602                         btrfs_stripe_devid_nr(leaf, ptr, i);
6603                 rec->stripes[i].offset =
6604                         btrfs_stripe_offset_nr(leaf, ptr, i);
6605                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6606                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6607                                 BTRFS_UUID_SIZE);
6608         }
6609
6610         return rec;
6611 }
6612
6613 static int process_chunk_item(struct cache_tree *chunk_cache,
6614                               struct btrfs_key *key, struct extent_buffer *eb,
6615                               int slot)
6616 {
6617         struct chunk_record *rec;
6618         struct btrfs_chunk *chunk;
6619         int ret = 0;
6620
6621         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6622         /*
6623          * Do extra check for this chunk item,
6624          *
6625          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6626          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6627          * and owner<->key_type check.
6628          */
6629         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6630                                       key->offset);
6631         if (ret < 0) {
6632                 error("chunk(%llu, %llu) is not valid, ignore it",
6633                       key->offset, btrfs_chunk_length(eb, chunk));
6634                 return 0;
6635         }
6636         rec = btrfs_new_chunk_record(eb, key, slot);
6637         ret = insert_cache_extent(chunk_cache, &rec->cache);
6638         if (ret) {
6639                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6640                         rec->offset, rec->length);
6641                 free(rec);
6642         }
6643
6644         return ret;
6645 }
6646
6647 static int process_device_item(struct rb_root *dev_cache,
6648                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6649 {
6650         struct btrfs_dev_item *ptr;
6651         struct device_record *rec;
6652         int ret = 0;
6653
6654         ptr = btrfs_item_ptr(eb,
6655                 slot, struct btrfs_dev_item);
6656
6657         rec = malloc(sizeof(*rec));
6658         if (!rec) {
6659                 fprintf(stderr, "memory allocation failed\n");
6660                 return -ENOMEM;
6661         }
6662
6663         rec->devid = key->offset;
6664         rec->generation = btrfs_header_generation(eb);
6665
6666         rec->objectid = key->objectid;
6667         rec->type = key->type;
6668         rec->offset = key->offset;
6669
6670         rec->devid = btrfs_device_id(eb, ptr);
6671         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6672         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6673
6674         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6675         if (ret) {
6676                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6677                 free(rec);
6678         }
6679
6680         return ret;
6681 }
6682
6683 struct block_group_record *
6684 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6685                              int slot)
6686 {
6687         struct btrfs_block_group_item *ptr;
6688         struct block_group_record *rec;
6689
6690         rec = calloc(1, sizeof(*rec));
6691         if (!rec) {
6692                 fprintf(stderr, "memory allocation failed\n");
6693                 exit(-1);
6694         }
6695
6696         rec->cache.start = key->objectid;
6697         rec->cache.size = key->offset;
6698
6699         rec->generation = btrfs_header_generation(leaf);
6700
6701         rec->objectid = key->objectid;
6702         rec->type = key->type;
6703         rec->offset = key->offset;
6704
6705         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6706         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6707
6708         INIT_LIST_HEAD(&rec->list);
6709
6710         return rec;
6711 }
6712
6713 static int process_block_group_item(struct block_group_tree *block_group_cache,
6714                                     struct btrfs_key *key,
6715                                     struct extent_buffer *eb, int slot)
6716 {
6717         struct block_group_record *rec;
6718         int ret = 0;
6719
6720         rec = btrfs_new_block_group_record(eb, key, slot);
6721         ret = insert_block_group_record(block_group_cache, rec);
6722         if (ret) {
6723                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6724                         rec->objectid, rec->offset);
6725                 free(rec);
6726         }
6727
6728         return ret;
6729 }
6730
6731 struct device_extent_record *
6732 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6733                                struct btrfs_key *key, int slot)
6734 {
6735         struct device_extent_record *rec;
6736         struct btrfs_dev_extent *ptr;
6737
6738         rec = calloc(1, sizeof(*rec));
6739         if (!rec) {
6740                 fprintf(stderr, "memory allocation failed\n");
6741                 exit(-1);
6742         }
6743
6744         rec->cache.objectid = key->objectid;
6745         rec->cache.start = key->offset;
6746
6747         rec->generation = btrfs_header_generation(leaf);
6748
6749         rec->objectid = key->objectid;
6750         rec->type = key->type;
6751         rec->offset = key->offset;
6752
6753         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6754         rec->chunk_objecteid =
6755                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6756         rec->chunk_offset =
6757                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6758         rec->length = btrfs_dev_extent_length(leaf, ptr);
6759         rec->cache.size = rec->length;
6760
6761         INIT_LIST_HEAD(&rec->chunk_list);
6762         INIT_LIST_HEAD(&rec->device_list);
6763
6764         return rec;
6765 }
6766
6767 static int
6768 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6769                            struct btrfs_key *key, struct extent_buffer *eb,
6770                            int slot)
6771 {
6772         struct device_extent_record *rec;
6773         int ret;
6774
6775         rec = btrfs_new_device_extent_record(eb, key, slot);
6776         ret = insert_device_extent_record(dev_extent_cache, rec);
6777         if (ret) {
6778                 fprintf(stderr,
6779                         "Device extent[%llu, %llu, %llu] existed.\n",
6780                         rec->objectid, rec->offset, rec->length);
6781                 free(rec);
6782         }
6783
6784         return ret;
6785 }
6786
6787 static int process_extent_item(struct btrfs_root *root,
6788                                struct cache_tree *extent_cache,
6789                                struct extent_buffer *eb, int slot)
6790 {
6791         struct btrfs_extent_item *ei;
6792         struct btrfs_extent_inline_ref *iref;
6793         struct btrfs_extent_data_ref *dref;
6794         struct btrfs_shared_data_ref *sref;
6795         struct btrfs_key key;
6796         struct extent_record tmpl;
6797         unsigned long end;
6798         unsigned long ptr;
6799         int ret;
6800         int type;
6801         u32 item_size = btrfs_item_size_nr(eb, slot);
6802         u64 refs = 0;
6803         u64 offset;
6804         u64 num_bytes;
6805         int metadata = 0;
6806
6807         btrfs_item_key_to_cpu(eb, &key, slot);
6808
6809         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6810                 metadata = 1;
6811                 num_bytes = root->fs_info->nodesize;
6812         } else {
6813                 num_bytes = key.offset;
6814         }
6815
6816         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6817                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6818                       key.objectid, root->fs_info->sectorsize);
6819                 return -EIO;
6820         }
6821         if (item_size < sizeof(*ei)) {
6822 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6823                 struct btrfs_extent_item_v0 *ei0;
6824                 BUG_ON(item_size != sizeof(*ei0));
6825                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6826                 refs = btrfs_extent_refs_v0(eb, ei0);
6827 #else
6828                 BUG();
6829 #endif
6830                 memset(&tmpl, 0, sizeof(tmpl));
6831                 tmpl.start = key.objectid;
6832                 tmpl.nr = num_bytes;
6833                 tmpl.extent_item_refs = refs;
6834                 tmpl.metadata = metadata;
6835                 tmpl.found_rec = 1;
6836                 tmpl.max_size = num_bytes;
6837
6838                 return add_extent_rec(extent_cache, &tmpl);
6839         }
6840
6841         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6842         refs = btrfs_extent_refs(eb, ei);
6843         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6844                 metadata = 1;
6845         else
6846                 metadata = 0;
6847         if (metadata && num_bytes != root->fs_info->nodesize) {
6848                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6849                       num_bytes, root->fs_info->nodesize);
6850                 return -EIO;
6851         }
6852         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6853                 error("ignore invalid data extent, length %llu is not aligned to %u",
6854                       num_bytes, root->fs_info->sectorsize);
6855                 return -EIO;
6856         }
6857
6858         memset(&tmpl, 0, sizeof(tmpl));
6859         tmpl.start = key.objectid;
6860         tmpl.nr = num_bytes;
6861         tmpl.extent_item_refs = refs;
6862         tmpl.metadata = metadata;
6863         tmpl.found_rec = 1;
6864         tmpl.max_size = num_bytes;
6865         add_extent_rec(extent_cache, &tmpl);
6866
6867         ptr = (unsigned long)(ei + 1);
6868         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6869             key.type == BTRFS_EXTENT_ITEM_KEY)
6870                 ptr += sizeof(struct btrfs_tree_block_info);
6871
6872         end = (unsigned long)ei + item_size;
6873         while (ptr < end) {
6874                 iref = (struct btrfs_extent_inline_ref *)ptr;
6875                 type = btrfs_extent_inline_ref_type(eb, iref);
6876                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6877                 switch (type) {
6878                 case BTRFS_TREE_BLOCK_REF_KEY:
6879                         ret = add_tree_backref(extent_cache, key.objectid,
6880                                         0, offset, 0);
6881                         if (ret < 0)
6882                                 error(
6883                         "add_tree_backref failed (extent items tree block): %s",
6884                                       strerror(-ret));
6885                         break;
6886                 case BTRFS_SHARED_BLOCK_REF_KEY:
6887                         ret = add_tree_backref(extent_cache, key.objectid,
6888                                         offset, 0, 0);
6889                         if (ret < 0)
6890                                 error(
6891                         "add_tree_backref failed (extent items shared block): %s",
6892                                       strerror(-ret));
6893                         break;
6894                 case BTRFS_EXTENT_DATA_REF_KEY:
6895                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6896                         add_data_backref(extent_cache, key.objectid, 0,
6897                                         btrfs_extent_data_ref_root(eb, dref),
6898                                         btrfs_extent_data_ref_objectid(eb,
6899                                                                        dref),
6900                                         btrfs_extent_data_ref_offset(eb, dref),
6901                                         btrfs_extent_data_ref_count(eb, dref),
6902                                         0, num_bytes);
6903                         break;
6904                 case BTRFS_SHARED_DATA_REF_KEY:
6905                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6906                         add_data_backref(extent_cache, key.objectid, offset,
6907                                         0, 0, 0,
6908                                         btrfs_shared_data_ref_count(eb, sref),
6909                                         0, num_bytes);
6910                         break;
6911                 default:
6912                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6913                                 key.objectid, key.type, num_bytes);
6914                         goto out;
6915                 }
6916                 ptr += btrfs_extent_inline_ref_size(type);
6917         }
6918         WARN_ON(ptr > end);
6919 out:
6920         return 0;
6921 }
6922
6923 static int check_cache_range(struct btrfs_root *root,
6924                              struct btrfs_block_group_cache *cache,
6925                              u64 offset, u64 bytes)
6926 {
6927         struct btrfs_free_space *entry;
6928         u64 *logical;
6929         u64 bytenr;
6930         int stripe_len;
6931         int i, nr, ret;
6932
6933         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6934                 bytenr = btrfs_sb_offset(i);
6935                 ret = btrfs_rmap_block(root->fs_info,
6936                                        cache->key.objectid, bytenr, 0,
6937                                        &logical, &nr, &stripe_len);
6938                 if (ret)
6939                         return ret;
6940
6941                 while (nr--) {
6942                         if (logical[nr] + stripe_len <= offset)
6943                                 continue;
6944                         if (offset + bytes <= logical[nr])
6945                                 continue;
6946                         if (logical[nr] == offset) {
6947                                 if (stripe_len >= bytes) {
6948                                         free(logical);
6949                                         return 0;
6950                                 }
6951                                 bytes -= stripe_len;
6952                                 offset += stripe_len;
6953                         } else if (logical[nr] < offset) {
6954                                 if (logical[nr] + stripe_len >=
6955                                     offset + bytes) {
6956                                         free(logical);
6957                                         return 0;
6958                                 }
6959                                 bytes = (offset + bytes) -
6960                                         (logical[nr] + stripe_len);
6961                                 offset = logical[nr] + stripe_len;
6962                         } else {
6963                                 /*
6964                                  * Could be tricky, the super may land in the
6965                                  * middle of the area we're checking.  First
6966                                  * check the easiest case, it's at the end.
6967                                  */
6968                                 if (logical[nr] + stripe_len >=
6969                                     bytes + offset) {
6970                                         bytes = logical[nr] - offset;
6971                                         continue;
6972                                 }
6973
6974                                 /* Check the left side */
6975                                 ret = check_cache_range(root, cache,
6976                                                         offset,
6977                                                         logical[nr] - offset);
6978                                 if (ret) {
6979                                         free(logical);
6980                                         return ret;
6981                                 }
6982
6983                                 /* Now we continue with the right side */
6984                                 bytes = (offset + bytes) -
6985                                         (logical[nr] + stripe_len);
6986                                 offset = logical[nr] + stripe_len;
6987                         }
6988                 }
6989
6990                 free(logical);
6991         }
6992
6993         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6994         if (!entry) {
6995                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6996                         offset, offset+bytes);
6997                 return -EINVAL;
6998         }
6999
7000         if (entry->offset != offset) {
7001                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7002                         entry->offset);
7003                 return -EINVAL;
7004         }
7005
7006         if (entry->bytes != bytes) {
7007                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7008                         bytes, entry->bytes, offset);
7009                 return -EINVAL;
7010         }
7011
7012         unlink_free_space(cache->free_space_ctl, entry);
7013         free(entry);
7014         return 0;
7015 }
7016
7017 static int verify_space_cache(struct btrfs_root *root,
7018                               struct btrfs_block_group_cache *cache)
7019 {
7020         struct btrfs_path path;
7021         struct extent_buffer *leaf;
7022         struct btrfs_key key;
7023         u64 last;
7024         int ret = 0;
7025
7026         root = root->fs_info->extent_root;
7027
7028         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7029
7030         btrfs_init_path(&path);
7031         key.objectid = last;
7032         key.offset = 0;
7033         key.type = BTRFS_EXTENT_ITEM_KEY;
7034         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7035         if (ret < 0)
7036                 goto out;
7037         ret = 0;
7038         while (1) {
7039                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7040                         ret = btrfs_next_leaf(root, &path);
7041                         if (ret < 0)
7042                                 goto out;
7043                         if (ret > 0) {
7044                                 ret = 0;
7045                                 break;
7046                         }
7047                 }
7048                 leaf = path.nodes[0];
7049                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7050                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7051                         break;
7052                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7053                     key.type != BTRFS_METADATA_ITEM_KEY) {
7054                         path.slots[0]++;
7055                         continue;
7056                 }
7057
7058                 if (last == key.objectid) {
7059                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7060                                 last = key.objectid + key.offset;
7061                         else
7062                                 last = key.objectid + root->fs_info->nodesize;
7063                         path.slots[0]++;
7064                         continue;
7065                 }
7066
7067                 ret = check_cache_range(root, cache, last,
7068                                         key.objectid - last);
7069                 if (ret)
7070                         break;
7071                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7072                         last = key.objectid + key.offset;
7073                 else
7074                         last = key.objectid + root->fs_info->nodesize;
7075                 path.slots[0]++;
7076         }
7077
7078         if (last < cache->key.objectid + cache->key.offset)
7079                 ret = check_cache_range(root, cache, last,
7080                                         cache->key.objectid +
7081                                         cache->key.offset - last);
7082
7083 out:
7084         btrfs_release_path(&path);
7085
7086         if (!ret &&
7087             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7088                 fprintf(stderr, "There are still entries left in the space "
7089                         "cache\n");
7090                 ret = -EINVAL;
7091         }
7092
7093         return ret;
7094 }
7095
7096 static int check_space_cache(struct btrfs_root *root)
7097 {
7098         struct btrfs_block_group_cache *cache;
7099         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7100         int ret;
7101         int error = 0;
7102
7103         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7104             btrfs_super_generation(root->fs_info->super_copy) !=
7105             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7106                 printf("cache and super generation don't match, space cache "
7107                        "will be invalidated\n");
7108                 return 0;
7109         }
7110
7111         if (ctx.progress_enabled) {
7112                 ctx.tp = TASK_FREE_SPACE;
7113                 task_start(ctx.info);
7114         }
7115
7116         while (1) {
7117                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7118                 if (!cache)
7119                         break;
7120
7121                 start = cache->key.objectid + cache->key.offset;
7122                 if (!cache->free_space_ctl) {
7123                         if (btrfs_init_free_space_ctl(cache,
7124                                                 root->fs_info->sectorsize)) {
7125                                 ret = -ENOMEM;
7126                                 break;
7127                         }
7128                 } else {
7129                         btrfs_remove_free_space_cache(cache);
7130                 }
7131
7132                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7133                         ret = exclude_super_stripes(root, cache);
7134                         if (ret) {
7135                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7136                                         strerror(-ret));
7137                                 error++;
7138                                 continue;
7139                         }
7140                         ret = load_free_space_tree(root->fs_info, cache);
7141                         free_excluded_extents(root, cache);
7142                         if (ret < 0) {
7143                                 fprintf(stderr, "could not load free space tree: %s\n",
7144                                         strerror(-ret));
7145                                 error++;
7146                                 continue;
7147                         }
7148                         error += ret;
7149                 } else {
7150                         ret = load_free_space_cache(root->fs_info, cache);
7151                         if (!ret)
7152                                 continue;
7153                 }
7154
7155                 ret = verify_space_cache(root, cache);
7156                 if (ret) {
7157                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7158                                 cache->key.objectid);
7159                         error++;
7160                 }
7161         }
7162
7163         task_stop(ctx.info);
7164
7165         return error ? -EINVAL : 0;
7166 }
7167
7168 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7169                         u64 num_bytes, unsigned long leaf_offset,
7170                         struct extent_buffer *eb) {
7171
7172         struct btrfs_fs_info *fs_info = root->fs_info;
7173         u64 offset = 0;
7174         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7175         char *data;
7176         unsigned long csum_offset;
7177         u32 csum;
7178         u32 csum_expected;
7179         u64 read_len;
7180         u64 data_checked = 0;
7181         u64 tmp;
7182         int ret = 0;
7183         int mirror;
7184         int num_copies;
7185
7186         if (num_bytes % fs_info->sectorsize)
7187                 return -EINVAL;
7188
7189         data = malloc(num_bytes);
7190         if (!data)
7191                 return -ENOMEM;
7192
7193         while (offset < num_bytes) {
7194                 mirror = 0;
7195 again:
7196                 read_len = num_bytes - offset;
7197                 /* read as much space once a time */
7198                 ret = read_extent_data(fs_info, data + offset,
7199                                 bytenr + offset, &read_len, mirror);
7200                 if (ret)
7201                         goto out;
7202                 data_checked = 0;
7203                 /* verify every 4k data's checksum */
7204                 while (data_checked < read_len) {
7205                         csum = ~(u32)0;
7206                         tmp = offset + data_checked;
7207
7208                         csum = btrfs_csum_data((char *)data + tmp,
7209                                                csum, fs_info->sectorsize);
7210                         btrfs_csum_final(csum, (u8 *)&csum);
7211
7212                         csum_offset = leaf_offset +
7213                                  tmp / fs_info->sectorsize * csum_size;
7214                         read_extent_buffer(eb, (char *)&csum_expected,
7215                                            csum_offset, csum_size);
7216                         /* try another mirror */
7217                         if (csum != csum_expected) {
7218                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7219                                                 mirror, bytenr + tmp,
7220                                                 csum, csum_expected);
7221                                 num_copies = btrfs_num_copies(root->fs_info,
7222                                                 bytenr, num_bytes);
7223                                 if (mirror < num_copies - 1) {
7224                                         mirror += 1;
7225                                         goto again;
7226                                 }
7227                         }
7228                         data_checked += fs_info->sectorsize;
7229                 }
7230                 offset += read_len;
7231         }
7232 out:
7233         free(data);
7234         return ret;
7235 }
7236
7237 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7238                                u64 num_bytes)
7239 {
7240         struct btrfs_path path;
7241         struct extent_buffer *leaf;
7242         struct btrfs_key key;
7243         int ret;
7244
7245         btrfs_init_path(&path);
7246         key.objectid = bytenr;
7247         key.type = BTRFS_EXTENT_ITEM_KEY;
7248         key.offset = (u64)-1;
7249
7250 again:
7251         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7252                                 0, 0);
7253         if (ret < 0) {
7254                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7255                 btrfs_release_path(&path);
7256                 return ret;
7257         } else if (ret) {
7258                 if (path.slots[0] > 0) {
7259                         path.slots[0]--;
7260                 } else {
7261                         ret = btrfs_prev_leaf(root, &path);
7262                         if (ret < 0) {
7263                                 goto out;
7264                         } else if (ret > 0) {
7265                                 ret = 0;
7266                                 goto out;
7267                         }
7268                 }
7269         }
7270
7271         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7272
7273         /*
7274          * Block group items come before extent items if they have the same
7275          * bytenr, so walk back one more just in case.  Dear future traveller,
7276          * first congrats on mastering time travel.  Now if it's not too much
7277          * trouble could you go back to 2006 and tell Chris to make the
7278          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7279          * EXTENT_ITEM_KEY please?
7280          */
7281         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7282                 if (path.slots[0] > 0) {
7283                         path.slots[0]--;
7284                 } else {
7285                         ret = btrfs_prev_leaf(root, &path);
7286                         if (ret < 0) {
7287                                 goto out;
7288                         } else if (ret > 0) {
7289                                 ret = 0;
7290                                 goto out;
7291                         }
7292                 }
7293                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7294         }
7295
7296         while (num_bytes) {
7297                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7298                         ret = btrfs_next_leaf(root, &path);
7299                         if (ret < 0) {
7300                                 fprintf(stderr, "Error going to next leaf "
7301                                         "%d\n", ret);
7302                                 btrfs_release_path(&path);
7303                                 return ret;
7304                         } else if (ret) {
7305                                 break;
7306                         }
7307                 }
7308                 leaf = path.nodes[0];
7309                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7310                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7311                         path.slots[0]++;
7312                         continue;
7313                 }
7314                 if (key.objectid + key.offset < bytenr) {
7315                         path.slots[0]++;
7316                         continue;
7317                 }
7318                 if (key.objectid > bytenr + num_bytes)
7319                         break;
7320
7321                 if (key.objectid == bytenr) {
7322                         if (key.offset >= num_bytes) {
7323                                 num_bytes = 0;
7324                                 break;
7325                         }
7326                         num_bytes -= key.offset;
7327                         bytenr += key.offset;
7328                 } else if (key.objectid < bytenr) {
7329                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7330                                 num_bytes = 0;
7331                                 break;
7332                         }
7333                         num_bytes = (bytenr + num_bytes) -
7334                                 (key.objectid + key.offset);
7335                         bytenr = key.objectid + key.offset;
7336                 } else {
7337                         if (key.objectid + key.offset < bytenr + num_bytes) {
7338                                 u64 new_start = key.objectid + key.offset;
7339                                 u64 new_bytes = bytenr + num_bytes - new_start;
7340
7341                                 /*
7342                                  * Weird case, the extent is in the middle of
7343                                  * our range, we'll have to search one side
7344                                  * and then the other.  Not sure if this happens
7345                                  * in real life, but no harm in coding it up
7346                                  * anyway just in case.
7347                                  */
7348                                 btrfs_release_path(&path);
7349                                 ret = check_extent_exists(root, new_start,
7350                                                           new_bytes);
7351                                 if (ret) {
7352                                         fprintf(stderr, "Right section didn't "
7353                                                 "have a record\n");
7354                                         break;
7355                                 }
7356                                 num_bytes = key.objectid - bytenr;
7357                                 goto again;
7358                         }
7359                         num_bytes = key.objectid - bytenr;
7360                 }
7361                 path.slots[0]++;
7362         }
7363         ret = 0;
7364
7365 out:
7366         if (num_bytes && !ret) {
7367                 fprintf(stderr, "There are no extents for csum range "
7368                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7369                 ret = 1;
7370         }
7371
7372         btrfs_release_path(&path);
7373         return ret;
7374 }
7375
7376 static int check_csums(struct btrfs_root *root)
7377 {
7378         struct btrfs_path path;
7379         struct extent_buffer *leaf;
7380         struct btrfs_key key;
7381         u64 offset = 0, num_bytes = 0;
7382         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7383         int errors = 0;
7384         int ret;
7385         u64 data_len;
7386         unsigned long leaf_offset;
7387
7388         root = root->fs_info->csum_root;
7389         if (!extent_buffer_uptodate(root->node)) {
7390                 fprintf(stderr, "No valid csum tree found\n");
7391                 return -ENOENT;
7392         }
7393
7394         btrfs_init_path(&path);
7395         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7396         key.type = BTRFS_EXTENT_CSUM_KEY;
7397         key.offset = 0;
7398         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7399         if (ret < 0) {
7400                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7401                 btrfs_release_path(&path);
7402                 return ret;
7403         }
7404
7405         if (ret > 0 && path.slots[0])
7406                 path.slots[0]--;
7407         ret = 0;
7408
7409         while (1) {
7410                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7411                         ret = btrfs_next_leaf(root, &path);
7412                         if (ret < 0) {
7413                                 fprintf(stderr, "Error going to next leaf "
7414                                         "%d\n", ret);
7415                                 break;
7416                         }
7417                         if (ret)
7418                                 break;
7419                 }
7420                 leaf = path.nodes[0];
7421
7422                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7423                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7424                         path.slots[0]++;
7425                         continue;
7426                 }
7427
7428                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7429                               csum_size) * root->fs_info->sectorsize;
7430                 if (!check_data_csum)
7431                         goto skip_csum_check;
7432                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7433                 ret = check_extent_csums(root, key.offset, data_len,
7434                                          leaf_offset, leaf);
7435                 if (ret)
7436                         break;
7437 skip_csum_check:
7438                 if (!num_bytes) {
7439                         offset = key.offset;
7440                 } else if (key.offset != offset + num_bytes) {
7441                         ret = check_extent_exists(root, offset, num_bytes);
7442                         if (ret) {
7443                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7444                                         "there is no extent record\n",
7445                                         offset, offset+num_bytes);
7446                                 errors++;
7447                         }
7448                         offset = key.offset;
7449                         num_bytes = 0;
7450                 }
7451                 num_bytes += data_len;
7452                 path.slots[0]++;
7453         }
7454
7455         btrfs_release_path(&path);
7456         return errors;
7457 }
7458
7459 static int is_dropped_key(struct btrfs_key *key,
7460                           struct btrfs_key *drop_key) {
7461         if (key->objectid < drop_key->objectid)
7462                 return 1;
7463         else if (key->objectid == drop_key->objectid) {
7464                 if (key->type < drop_key->type)
7465                         return 1;
7466                 else if (key->type == drop_key->type) {
7467                         if (key->offset < drop_key->offset)
7468                                 return 1;
7469                 }
7470         }
7471         return 0;
7472 }
7473
7474 /*
7475  * Here are the rules for FULL_BACKREF.
7476  *
7477  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7478  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7479  *      FULL_BACKREF set.
7480  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7481  *    if it happened after the relocation occurred since we'll have dropped the
7482  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7483  *    have no real way to know for sure.
7484  *
7485  * We process the blocks one root at a time, and we start from the lowest root
7486  * objectid and go to the highest.  So we can just lookup the owner backref for
7487  * the record and if we don't find it then we know it doesn't exist and we have
7488  * a FULL BACKREF.
7489  *
7490  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7491  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7492  * be set or not and then we can check later once we've gathered all the refs.
7493  */
7494 static int calc_extent_flag(struct cache_tree *extent_cache,
7495                            struct extent_buffer *buf,
7496                            struct root_item_record *ri,
7497                            u64 *flags)
7498 {
7499         struct extent_record *rec;
7500         struct cache_extent *cache;
7501         struct tree_backref *tback;
7502         u64 owner = 0;
7503
7504         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7505         /* we have added this extent before */
7506         if (!cache)
7507                 return -ENOENT;
7508
7509         rec = container_of(cache, struct extent_record, cache);
7510
7511         /*
7512          * Except file/reloc tree, we can not have
7513          * FULL BACKREF MODE
7514          */
7515         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7516                 goto normal;
7517         /*
7518          * root node
7519          */
7520         if (buf->start == ri->bytenr)
7521                 goto normal;
7522
7523         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7524                 goto full_backref;
7525
7526         owner = btrfs_header_owner(buf);
7527         if (owner == ri->objectid)
7528                 goto normal;
7529
7530         tback = find_tree_backref(rec, 0, owner);
7531         if (!tback)
7532                 goto full_backref;
7533 normal:
7534         *flags = 0;
7535         if (rec->flag_block_full_backref != FLAG_UNSET &&
7536             rec->flag_block_full_backref != 0)
7537                 rec->bad_full_backref = 1;
7538         return 0;
7539 full_backref:
7540         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7541         if (rec->flag_block_full_backref != FLAG_UNSET &&
7542             rec->flag_block_full_backref != 1)
7543                 rec->bad_full_backref = 1;
7544         return 0;
7545 }
7546
7547 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7548 {
7549         fprintf(stderr, "Invalid key type(");
7550         print_key_type(stderr, 0, key_type);
7551         fprintf(stderr, ") found in root(");
7552         print_objectid(stderr, rootid, 0);
7553         fprintf(stderr, ")\n");
7554 }
7555
7556 /*
7557  * Check if the key is valid with its extent buffer.
7558  *
7559  * This is a early check in case invalid key exists in a extent buffer
7560  * This is not comprehensive yet, but should prevent wrong key/item passed
7561  * further
7562  */
7563 static int check_type_with_root(u64 rootid, u8 key_type)
7564 {
7565         switch (key_type) {
7566         /* Only valid in chunk tree */
7567         case BTRFS_DEV_ITEM_KEY:
7568         case BTRFS_CHUNK_ITEM_KEY:
7569                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7570                         goto err;
7571                 break;
7572         /* valid in csum and log tree */
7573         case BTRFS_CSUM_TREE_OBJECTID:
7574                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7575                       is_fstree(rootid)))
7576                         goto err;
7577                 break;
7578         case BTRFS_EXTENT_ITEM_KEY:
7579         case BTRFS_METADATA_ITEM_KEY:
7580         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7581                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7582                         goto err;
7583                 break;
7584         case BTRFS_ROOT_ITEM_KEY:
7585                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7586                         goto err;
7587                 break;
7588         case BTRFS_DEV_EXTENT_KEY:
7589                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7590                         goto err;
7591                 break;
7592         }
7593         return 0;
7594 err:
7595         report_mismatch_key_root(key_type, rootid);
7596         return -EINVAL;
7597 }
7598
7599 static int run_next_block(struct btrfs_root *root,
7600                           struct block_info *bits,
7601                           int bits_nr,
7602                           u64 *last,
7603                           struct cache_tree *pending,
7604                           struct cache_tree *seen,
7605                           struct cache_tree *reada,
7606                           struct cache_tree *nodes,
7607                           struct cache_tree *extent_cache,
7608                           struct cache_tree *chunk_cache,
7609                           struct rb_root *dev_cache,
7610                           struct block_group_tree *block_group_cache,
7611                           struct device_extent_tree *dev_extent_cache,
7612                           struct root_item_record *ri)
7613 {
7614         struct btrfs_fs_info *fs_info = root->fs_info;
7615         struct extent_buffer *buf;
7616         struct extent_record *rec = NULL;
7617         u64 bytenr;
7618         u32 size;
7619         u64 parent;
7620         u64 owner;
7621         u64 flags;
7622         u64 ptr;
7623         u64 gen = 0;
7624         int ret = 0;
7625         int i;
7626         int nritems;
7627         struct btrfs_key key;
7628         struct cache_extent *cache;
7629         int reada_bits;
7630
7631         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7632                                     bits_nr, &reada_bits);
7633         if (nritems == 0)
7634                 return 1;
7635
7636         if (!reada_bits) {
7637                 for(i = 0; i < nritems; i++) {
7638                         ret = add_cache_extent(reada, bits[i].start,
7639                                                bits[i].size);
7640                         if (ret == -EEXIST)
7641                                 continue;
7642
7643                         /* fixme, get the parent transid */
7644                         readahead_tree_block(fs_info, bits[i].start,
7645                                              bits[i].size, 0);
7646                 }
7647         }
7648         *last = bits[0].start;
7649         bytenr = bits[0].start;
7650         size = bits[0].size;
7651
7652         cache = lookup_cache_extent(pending, bytenr, size);
7653         if (cache) {
7654                 remove_cache_extent(pending, cache);
7655                 free(cache);
7656         }
7657         cache = lookup_cache_extent(reada, bytenr, size);
7658         if (cache) {
7659                 remove_cache_extent(reada, cache);
7660                 free(cache);
7661         }
7662         cache = lookup_cache_extent(nodes, bytenr, size);
7663         if (cache) {
7664                 remove_cache_extent(nodes, cache);
7665                 free(cache);
7666         }
7667         cache = lookup_cache_extent(extent_cache, bytenr, size);
7668         if (cache) {
7669                 rec = container_of(cache, struct extent_record, cache);
7670                 gen = rec->parent_generation;
7671         }
7672
7673         /* fixme, get the real parent transid */
7674         buf = read_tree_block(root->fs_info, bytenr, size, gen);
7675         if (!extent_buffer_uptodate(buf)) {
7676                 record_bad_block_io(root->fs_info,
7677                                     extent_cache, bytenr, size);
7678                 goto out;
7679         }
7680
7681         nritems = btrfs_header_nritems(buf);
7682
7683         flags = 0;
7684         if (!init_extent_tree) {
7685                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7686                                        btrfs_header_level(buf), 1, NULL,
7687                                        &flags);
7688                 if (ret < 0) {
7689                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7690                         if (ret < 0) {
7691                                 fprintf(stderr, "Couldn't calc extent flags\n");
7692                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7693                         }
7694                 }
7695         } else {
7696                 flags = 0;
7697                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7698                 if (ret < 0) {
7699                         fprintf(stderr, "Couldn't calc extent flags\n");
7700                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7701                 }
7702         }
7703
7704         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7705                 if (ri != NULL &&
7706                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7707                     ri->objectid == btrfs_header_owner(buf)) {
7708                         /*
7709                          * Ok we got to this block from it's original owner and
7710                          * we have FULL_BACKREF set.  Relocation can leave
7711                          * converted blocks over so this is altogether possible,
7712                          * however it's not possible if the generation > the
7713                          * last snapshot, so check for this case.
7714                          */
7715                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7716                             btrfs_header_generation(buf) > ri->last_snapshot) {
7717                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7718                                 rec->bad_full_backref = 1;
7719                         }
7720                 }
7721         } else {
7722                 if (ri != NULL &&
7723                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7724                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7725                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7726                         rec->bad_full_backref = 1;
7727                 }
7728         }
7729
7730         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7731                 rec->flag_block_full_backref = 1;
7732                 parent = bytenr;
7733                 owner = 0;
7734         } else {
7735                 rec->flag_block_full_backref = 0;
7736                 parent = 0;
7737                 owner = btrfs_header_owner(buf);
7738         }
7739
7740         ret = check_block(root, extent_cache, buf, flags);
7741         if (ret)
7742                 goto out;
7743
7744         if (btrfs_is_leaf(buf)) {
7745                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7746                 for (i = 0; i < nritems; i++) {
7747                         struct btrfs_file_extent_item *fi;
7748                         btrfs_item_key_to_cpu(buf, &key, i);
7749                         /*
7750                          * Check key type against the leaf owner.
7751                          * Could filter quite a lot of early error if
7752                          * owner is correct
7753                          */
7754                         if (check_type_with_root(btrfs_header_owner(buf),
7755                                                  key.type)) {
7756                                 fprintf(stderr, "ignoring invalid key\n");
7757                                 continue;
7758                         }
7759                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7760                                 process_extent_item(root, extent_cache, buf,
7761                                                     i);
7762                                 continue;
7763                         }
7764                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7765                                 process_extent_item(root, extent_cache, buf,
7766                                                     i);
7767                                 continue;
7768                         }
7769                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7770                                 total_csum_bytes +=
7771                                         btrfs_item_size_nr(buf, i);
7772                                 continue;
7773                         }
7774                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7775                                 process_chunk_item(chunk_cache, &key, buf, i);
7776                                 continue;
7777                         }
7778                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7779                                 process_device_item(dev_cache, &key, buf, i);
7780                                 continue;
7781                         }
7782                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7783                                 process_block_group_item(block_group_cache,
7784                                         &key, buf, i);
7785                                 continue;
7786                         }
7787                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7788                                 process_device_extent_item(dev_extent_cache,
7789                                         &key, buf, i);
7790                                 continue;
7791
7792                         }
7793                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7794 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7795                                 process_extent_ref_v0(extent_cache, buf, i);
7796 #else
7797                                 BUG();
7798 #endif
7799                                 continue;
7800                         }
7801
7802                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7803                                 ret = add_tree_backref(extent_cache,
7804                                                 key.objectid, 0, key.offset, 0);
7805                                 if (ret < 0)
7806                                         error(
7807                                 "add_tree_backref failed (leaf tree block): %s",
7808                                               strerror(-ret));
7809                                 continue;
7810                         }
7811                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7812                                 ret = add_tree_backref(extent_cache,
7813                                                 key.objectid, key.offset, 0, 0);
7814                                 if (ret < 0)
7815                                         error(
7816                                 "add_tree_backref failed (leaf shared block): %s",
7817                                               strerror(-ret));
7818                                 continue;
7819                         }
7820                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7821                                 struct btrfs_extent_data_ref *ref;
7822                                 ref = btrfs_item_ptr(buf, i,
7823                                                 struct btrfs_extent_data_ref);
7824                                 add_data_backref(extent_cache,
7825                                         key.objectid, 0,
7826                                         btrfs_extent_data_ref_root(buf, ref),
7827                                         btrfs_extent_data_ref_objectid(buf,
7828                                                                        ref),
7829                                         btrfs_extent_data_ref_offset(buf, ref),
7830                                         btrfs_extent_data_ref_count(buf, ref),
7831                                         0, root->fs_info->sectorsize);
7832                                 continue;
7833                         }
7834                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7835                                 struct btrfs_shared_data_ref *ref;
7836                                 ref = btrfs_item_ptr(buf, i,
7837                                                 struct btrfs_shared_data_ref);
7838                                 add_data_backref(extent_cache,
7839                                         key.objectid, key.offset, 0, 0, 0,
7840                                         btrfs_shared_data_ref_count(buf, ref),
7841                                         0, root->fs_info->sectorsize);
7842                                 continue;
7843                         }
7844                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7845                                 struct bad_item *bad;
7846
7847                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7848                                         continue;
7849                                 if (!owner)
7850                                         continue;
7851                                 bad = malloc(sizeof(struct bad_item));
7852                                 if (!bad)
7853                                         continue;
7854                                 INIT_LIST_HEAD(&bad->list);
7855                                 memcpy(&bad->key, &key,
7856                                        sizeof(struct btrfs_key));
7857                                 bad->root_id = owner;
7858                                 list_add_tail(&bad->list, &delete_items);
7859                                 continue;
7860                         }
7861                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7862                                 continue;
7863                         fi = btrfs_item_ptr(buf, i,
7864                                             struct btrfs_file_extent_item);
7865                         if (btrfs_file_extent_type(buf, fi) ==
7866                             BTRFS_FILE_EXTENT_INLINE)
7867                                 continue;
7868                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7869                                 continue;
7870
7871                         data_bytes_allocated +=
7872                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7873                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7874                                 abort();
7875                         }
7876                         data_bytes_referenced +=
7877                                 btrfs_file_extent_num_bytes(buf, fi);
7878                         add_data_backref(extent_cache,
7879                                 btrfs_file_extent_disk_bytenr(buf, fi),
7880                                 parent, owner, key.objectid, key.offset -
7881                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7882                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7883                 }
7884         } else {
7885                 int level;
7886                 struct btrfs_key first_key;
7887
7888                 first_key.objectid = 0;
7889
7890                 if (nritems > 0)
7891                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7892                 level = btrfs_header_level(buf);
7893                 for (i = 0; i < nritems; i++) {
7894                         struct extent_record tmpl;
7895
7896                         ptr = btrfs_node_blockptr(buf, i);
7897                         size = root->fs_info->nodesize;
7898                         btrfs_node_key_to_cpu(buf, &key, i);
7899                         if (ri != NULL) {
7900                                 if ((level == ri->drop_level)
7901                                     && is_dropped_key(&key, &ri->drop_key)) {
7902                                         continue;
7903                                 }
7904                         }
7905
7906                         memset(&tmpl, 0, sizeof(tmpl));
7907                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7908                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7909                         tmpl.start = ptr;
7910                         tmpl.nr = size;
7911                         tmpl.refs = 1;
7912                         tmpl.metadata = 1;
7913                         tmpl.max_size = size;
7914                         ret = add_extent_rec(extent_cache, &tmpl);
7915                         if (ret < 0)
7916                                 goto out;
7917
7918                         ret = add_tree_backref(extent_cache, ptr, parent,
7919                                         owner, 1);
7920                         if (ret < 0) {
7921                                 error(
7922                                 "add_tree_backref failed (non-leaf block): %s",
7923                                       strerror(-ret));
7924                                 continue;
7925                         }
7926
7927                         if (level > 1) {
7928                                 add_pending(nodes, seen, ptr, size);
7929                         } else {
7930                                 add_pending(pending, seen, ptr, size);
7931                         }
7932                 }
7933                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7934                                       nritems) * sizeof(struct btrfs_key_ptr);
7935         }
7936         total_btree_bytes += buf->len;
7937         if (fs_root_objectid(btrfs_header_owner(buf)))
7938                 total_fs_tree_bytes += buf->len;
7939         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7940                 total_extent_tree_bytes += buf->len;
7941         if (!found_old_backref &&
7942             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7943             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7944             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7945                 found_old_backref = 1;
7946 out:
7947         free_extent_buffer(buf);
7948         return ret;
7949 }
7950
7951 static int add_root_to_pending(struct extent_buffer *buf,
7952                                struct cache_tree *extent_cache,
7953                                struct cache_tree *pending,
7954                                struct cache_tree *seen,
7955                                struct cache_tree *nodes,
7956                                u64 objectid)
7957 {
7958         struct extent_record tmpl;
7959         int ret;
7960
7961         if (btrfs_header_level(buf) > 0)
7962                 add_pending(nodes, seen, buf->start, buf->len);
7963         else
7964                 add_pending(pending, seen, buf->start, buf->len);
7965
7966         memset(&tmpl, 0, sizeof(tmpl));
7967         tmpl.start = buf->start;
7968         tmpl.nr = buf->len;
7969         tmpl.is_root = 1;
7970         tmpl.refs = 1;
7971         tmpl.metadata = 1;
7972         tmpl.max_size = buf->len;
7973         add_extent_rec(extent_cache, &tmpl);
7974
7975         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7976             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7977                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7978                                 0, 1);
7979         else
7980                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7981                                 1);
7982         return ret;
7983 }
7984
7985 /* as we fix the tree, we might be deleting blocks that
7986  * we're tracking for repair.  This hook makes sure we
7987  * remove any backrefs for blocks as we are fixing them.
7988  */
7989 static int free_extent_hook(struct btrfs_trans_handle *trans,
7990                             struct btrfs_root *root,
7991                             u64 bytenr, u64 num_bytes, u64 parent,
7992                             u64 root_objectid, u64 owner, u64 offset,
7993                             int refs_to_drop)
7994 {
7995         struct extent_record *rec;
7996         struct cache_extent *cache;
7997         int is_data;
7998         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7999
8000         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8001         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8002         if (!cache)
8003                 return 0;
8004
8005         rec = container_of(cache, struct extent_record, cache);
8006         if (is_data) {
8007                 struct data_backref *back;
8008                 back = find_data_backref(rec, parent, root_objectid, owner,
8009                                          offset, 1, bytenr, num_bytes);
8010                 if (!back)
8011                         goto out;
8012                 if (back->node.found_ref) {
8013                         back->found_ref -= refs_to_drop;
8014                         if (rec->refs)
8015                                 rec->refs -= refs_to_drop;
8016                 }
8017                 if (back->node.found_extent_tree) {
8018                         back->num_refs -= refs_to_drop;
8019                         if (rec->extent_item_refs)
8020                                 rec->extent_item_refs -= refs_to_drop;
8021                 }
8022                 if (back->found_ref == 0)
8023                         back->node.found_ref = 0;
8024                 if (back->num_refs == 0)
8025                         back->node.found_extent_tree = 0;
8026
8027                 if (!back->node.found_extent_tree && back->node.found_ref) {
8028                         list_del(&back->node.list);
8029                         free(back);
8030                 }
8031         } else {
8032                 struct tree_backref *back;
8033                 back = find_tree_backref(rec, parent, root_objectid);
8034                 if (!back)
8035                         goto out;
8036                 if (back->node.found_ref) {
8037                         if (rec->refs)
8038                                 rec->refs--;
8039                         back->node.found_ref = 0;
8040                 }
8041                 if (back->node.found_extent_tree) {
8042                         if (rec->extent_item_refs)
8043                                 rec->extent_item_refs--;
8044                         back->node.found_extent_tree = 0;
8045                 }
8046                 if (!back->node.found_extent_tree && back->node.found_ref) {
8047                         list_del(&back->node.list);
8048                         free(back);
8049                 }
8050         }
8051         maybe_free_extent_rec(extent_cache, rec);
8052 out:
8053         return 0;
8054 }
8055
8056 static int delete_extent_records(struct btrfs_trans_handle *trans,
8057                                  struct btrfs_root *root,
8058                                  struct btrfs_path *path,
8059                                  u64 bytenr)
8060 {
8061         struct btrfs_key key;
8062         struct btrfs_key found_key;
8063         struct extent_buffer *leaf;
8064         int ret;
8065         int slot;
8066
8067
8068         key.objectid = bytenr;
8069         key.type = (u8)-1;
8070         key.offset = (u64)-1;
8071
8072         while(1) {
8073                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8074                                         &key, path, 0, 1);
8075                 if (ret < 0)
8076                         break;
8077
8078                 if (ret > 0) {
8079                         ret = 0;
8080                         if (path->slots[0] == 0)
8081                                 break;
8082                         path->slots[0]--;
8083                 }
8084                 ret = 0;
8085
8086                 leaf = path->nodes[0];
8087                 slot = path->slots[0];
8088
8089                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8090                 if (found_key.objectid != bytenr)
8091                         break;
8092
8093                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8094                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8095                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8096                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8097                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8098                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8099                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8100                         btrfs_release_path(path);
8101                         if (found_key.type == 0) {
8102                                 if (found_key.offset == 0)
8103                                         break;
8104                                 key.offset = found_key.offset - 1;
8105                                 key.type = found_key.type;
8106                         }
8107                         key.type = found_key.type - 1;
8108                         key.offset = (u64)-1;
8109                         continue;
8110                 }
8111
8112                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8113                         found_key.objectid, found_key.type, found_key.offset);
8114
8115                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8116                 if (ret)
8117                         break;
8118                 btrfs_release_path(path);
8119
8120                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8121                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8122                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8123                                 found_key.offset : root->fs_info->nodesize;
8124
8125                         ret = btrfs_update_block_group(trans, root, bytenr,
8126                                                        bytes, 0, 0);
8127                         if (ret)
8128                                 break;
8129                 }
8130         }
8131
8132         btrfs_release_path(path);
8133         return ret;
8134 }
8135
8136 /*
8137  * for a single backref, this will allocate a new extent
8138  * and add the backref to it.
8139  */
8140 static int record_extent(struct btrfs_trans_handle *trans,
8141                          struct btrfs_fs_info *info,
8142                          struct btrfs_path *path,
8143                          struct extent_record *rec,
8144                          struct extent_backref *back,
8145                          int allocated, u64 flags)
8146 {
8147         int ret = 0;
8148         struct btrfs_root *extent_root = info->extent_root;
8149         struct extent_buffer *leaf;
8150         struct btrfs_key ins_key;
8151         struct btrfs_extent_item *ei;
8152         struct data_backref *dback;
8153         struct btrfs_tree_block_info *bi;
8154
8155         if (!back->is_data)
8156                 rec->max_size = max_t(u64, rec->max_size,
8157                                     info->nodesize);
8158
8159         if (!allocated) {
8160                 u32 item_size = sizeof(*ei);
8161
8162                 if (!back->is_data)
8163                         item_size += sizeof(*bi);
8164
8165                 ins_key.objectid = rec->start;
8166                 ins_key.offset = rec->max_size;
8167                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8168
8169                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8170                                         &ins_key, item_size);
8171                 if (ret)
8172                         goto fail;
8173
8174                 leaf = path->nodes[0];
8175                 ei = btrfs_item_ptr(leaf, path->slots[0],
8176                                     struct btrfs_extent_item);
8177
8178                 btrfs_set_extent_refs(leaf, ei, 0);
8179                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8180
8181                 if (back->is_data) {
8182                         btrfs_set_extent_flags(leaf, ei,
8183                                                BTRFS_EXTENT_FLAG_DATA);
8184                 } else {
8185                         struct btrfs_disk_key copy_key;;
8186
8187                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8188                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8189                                              sizeof(*bi));
8190
8191                         btrfs_set_disk_key_objectid(&copy_key,
8192                                                     rec->info_objectid);
8193                         btrfs_set_disk_key_type(&copy_key, 0);
8194                         btrfs_set_disk_key_offset(&copy_key, 0);
8195
8196                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8197                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8198
8199                         btrfs_set_extent_flags(leaf, ei,
8200                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8201                 }
8202
8203                 btrfs_mark_buffer_dirty(leaf);
8204                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8205                                                rec->max_size, 1, 0);
8206                 if (ret)
8207                         goto fail;
8208                 btrfs_release_path(path);
8209         }
8210
8211         if (back->is_data) {
8212                 u64 parent;
8213                 int i;
8214
8215                 dback = to_data_backref(back);
8216                 if (back->full_backref)
8217                         parent = dback->parent;
8218                 else
8219                         parent = 0;
8220
8221                 for (i = 0; i < dback->found_ref; i++) {
8222                         /* if parent != 0, we're doing a full backref
8223                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8224                          * just makes the backref allocator create a data
8225                          * backref
8226                          */
8227                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8228                                                    rec->start, rec->max_size,
8229                                                    parent,
8230                                                    dback->root,
8231                                                    parent ?
8232                                                    BTRFS_FIRST_FREE_OBJECTID :
8233                                                    dback->owner,
8234                                                    dback->offset);
8235                         if (ret)
8236                                 break;
8237                 }
8238                 fprintf(stderr, "adding new data backref"
8239                                 " on %llu %s %llu owner %llu"
8240                                 " offset %llu found %d\n",
8241                                 (unsigned long long)rec->start,
8242                                 back->full_backref ?
8243                                 "parent" : "root",
8244                                 back->full_backref ?
8245                                 (unsigned long long)parent :
8246                                 (unsigned long long)dback->root,
8247                                 (unsigned long long)dback->owner,
8248                                 (unsigned long long)dback->offset,
8249                                 dback->found_ref);
8250         } else {
8251                 u64 parent;
8252                 struct tree_backref *tback;
8253
8254                 tback = to_tree_backref(back);
8255                 if (back->full_backref)
8256                         parent = tback->parent;
8257                 else
8258                         parent = 0;
8259
8260                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8261                                            rec->start, rec->max_size,
8262                                            parent, tback->root, 0, 0);
8263                 fprintf(stderr, "adding new tree backref on "
8264                         "start %llu len %llu parent %llu root %llu\n",
8265                         rec->start, rec->max_size, parent, tback->root);
8266         }
8267 fail:
8268         btrfs_release_path(path);
8269         return ret;
8270 }
8271
8272 static struct extent_entry *find_entry(struct list_head *entries,
8273                                        u64 bytenr, u64 bytes)
8274 {
8275         struct extent_entry *entry = NULL;
8276
8277         list_for_each_entry(entry, entries, list) {
8278                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8279                         return entry;
8280         }
8281
8282         return NULL;
8283 }
8284
8285 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8286 {
8287         struct extent_entry *entry, *best = NULL, *prev = NULL;
8288
8289         list_for_each_entry(entry, entries, list) {
8290                 /*
8291                  * If there are as many broken entries as entries then we know
8292                  * not to trust this particular entry.
8293                  */
8294                 if (entry->broken == entry->count)
8295                         continue;
8296
8297                 /*
8298                  * Special case, when there are only two entries and 'best' is
8299                  * the first one
8300                  */
8301                 if (!prev) {
8302                         best = entry;
8303                         prev = entry;
8304                         continue;
8305                 }
8306
8307                 /*
8308                  * If our current entry == best then we can't be sure our best
8309                  * is really the best, so we need to keep searching.
8310                  */
8311                 if (best && best->count == entry->count) {
8312                         prev = entry;
8313                         best = NULL;
8314                         continue;
8315                 }
8316
8317                 /* Prev == entry, not good enough, have to keep searching */
8318                 if (!prev->broken && prev->count == entry->count)
8319                         continue;
8320
8321                 if (!best)
8322                         best = (prev->count > entry->count) ? prev : entry;
8323                 else if (best->count < entry->count)
8324                         best = entry;
8325                 prev = entry;
8326         }
8327
8328         return best;
8329 }
8330
8331 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8332                       struct data_backref *dback, struct extent_entry *entry)
8333 {
8334         struct btrfs_trans_handle *trans;
8335         struct btrfs_root *root;
8336         struct btrfs_file_extent_item *fi;
8337         struct extent_buffer *leaf;
8338         struct btrfs_key key;
8339         u64 bytenr, bytes;
8340         int ret, err;
8341
8342         key.objectid = dback->root;
8343         key.type = BTRFS_ROOT_ITEM_KEY;
8344         key.offset = (u64)-1;
8345         root = btrfs_read_fs_root(info, &key);
8346         if (IS_ERR(root)) {
8347                 fprintf(stderr, "Couldn't find root for our ref\n");
8348                 return -EINVAL;
8349         }
8350
8351         /*
8352          * The backref points to the original offset of the extent if it was
8353          * split, so we need to search down to the offset we have and then walk
8354          * forward until we find the backref we're looking for.
8355          */
8356         key.objectid = dback->owner;
8357         key.type = BTRFS_EXTENT_DATA_KEY;
8358         key.offset = dback->offset;
8359         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8360         if (ret < 0) {
8361                 fprintf(stderr, "Error looking up ref %d\n", ret);
8362                 return ret;
8363         }
8364
8365         while (1) {
8366                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8367                         ret = btrfs_next_leaf(root, path);
8368                         if (ret) {
8369                                 fprintf(stderr, "Couldn't find our ref, next\n");
8370                                 return -EINVAL;
8371                         }
8372                 }
8373                 leaf = path->nodes[0];
8374                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8375                 if (key.objectid != dback->owner ||
8376                     key.type != BTRFS_EXTENT_DATA_KEY) {
8377                         fprintf(stderr, "Couldn't find our ref, search\n");
8378                         return -EINVAL;
8379                 }
8380                 fi = btrfs_item_ptr(leaf, path->slots[0],
8381                                     struct btrfs_file_extent_item);
8382                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8383                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8384
8385                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8386                         break;
8387                 path->slots[0]++;
8388         }
8389
8390         btrfs_release_path(path);
8391
8392         trans = btrfs_start_transaction(root, 1);
8393         if (IS_ERR(trans))
8394                 return PTR_ERR(trans);
8395
8396         /*
8397          * Ok we have the key of the file extent we want to fix, now we can cow
8398          * down to the thing and fix it.
8399          */
8400         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8401         if (ret < 0) {
8402                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8403                         key.objectid, key.type, key.offset, ret);
8404                 goto out;
8405         }
8406         if (ret > 0) {
8407                 fprintf(stderr, "Well that's odd, we just found this key "
8408                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8409                         key.offset);
8410                 ret = -EINVAL;
8411                 goto out;
8412         }
8413         leaf = path->nodes[0];
8414         fi = btrfs_item_ptr(leaf, path->slots[0],
8415                             struct btrfs_file_extent_item);
8416
8417         if (btrfs_file_extent_compression(leaf, fi) &&
8418             dback->disk_bytenr != entry->bytenr) {
8419                 fprintf(stderr, "Ref doesn't match the record start and is "
8420                         "compressed, please take a btrfs-image of this file "
8421                         "system and send it to a btrfs developer so they can "
8422                         "complete this functionality for bytenr %Lu\n",
8423                         dback->disk_bytenr);
8424                 ret = -EINVAL;
8425                 goto out;
8426         }
8427
8428         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8429                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8430         } else if (dback->disk_bytenr > entry->bytenr) {
8431                 u64 off_diff, offset;
8432
8433                 off_diff = dback->disk_bytenr - entry->bytenr;
8434                 offset = btrfs_file_extent_offset(leaf, fi);
8435                 if (dback->disk_bytenr + offset +
8436                     btrfs_file_extent_num_bytes(leaf, fi) >
8437                     entry->bytenr + entry->bytes) {
8438                         fprintf(stderr, "Ref is past the entry end, please "
8439                                 "take a btrfs-image of this file system and "
8440                                 "send it to a btrfs developer, ref %Lu\n",
8441                                 dback->disk_bytenr);
8442                         ret = -EINVAL;
8443                         goto out;
8444                 }
8445                 offset += off_diff;
8446                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8447                 btrfs_set_file_extent_offset(leaf, fi, offset);
8448         } else if (dback->disk_bytenr < entry->bytenr) {
8449                 u64 offset;
8450
8451                 offset = btrfs_file_extent_offset(leaf, fi);
8452                 if (dback->disk_bytenr + offset < entry->bytenr) {
8453                         fprintf(stderr, "Ref is before the entry start, please"
8454                                 " take a btrfs-image of this file system and "
8455                                 "send it to a btrfs developer, ref %Lu\n",
8456                                 dback->disk_bytenr);
8457                         ret = -EINVAL;
8458                         goto out;
8459                 }
8460
8461                 offset += dback->disk_bytenr;
8462                 offset -= entry->bytenr;
8463                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8464                 btrfs_set_file_extent_offset(leaf, fi, offset);
8465         }
8466
8467         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8468
8469         /*
8470          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8471          * only do this if we aren't using compression, otherwise it's a
8472          * trickier case.
8473          */
8474         if (!btrfs_file_extent_compression(leaf, fi))
8475                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8476         else
8477                 printf("ram bytes may be wrong?\n");
8478         btrfs_mark_buffer_dirty(leaf);
8479 out:
8480         err = btrfs_commit_transaction(trans, root);
8481         btrfs_release_path(path);
8482         return ret ? ret : err;
8483 }
8484
8485 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8486                            struct extent_record *rec)
8487 {
8488         struct extent_backref *back;
8489         struct data_backref *dback;
8490         struct extent_entry *entry, *best = NULL;
8491         LIST_HEAD(entries);
8492         int nr_entries = 0;
8493         int broken_entries = 0;
8494         int ret = 0;
8495         short mismatch = 0;
8496
8497         /*
8498          * Metadata is easy and the backrefs should always agree on bytenr and
8499          * size, if not we've got bigger issues.
8500          */
8501         if (rec->metadata)
8502                 return 0;
8503
8504         list_for_each_entry(back, &rec->backrefs, list) {
8505                 if (back->full_backref || !back->is_data)
8506                         continue;
8507
8508                 dback = to_data_backref(back);
8509
8510                 /*
8511                  * We only pay attention to backrefs that we found a real
8512                  * backref for.
8513                  */
8514                 if (dback->found_ref == 0)
8515                         continue;
8516
8517                 /*
8518                  * For now we only catch when the bytes don't match, not the
8519                  * bytenr.  We can easily do this at the same time, but I want
8520                  * to have a fs image to test on before we just add repair
8521                  * functionality willy-nilly so we know we won't screw up the
8522                  * repair.
8523                  */
8524
8525                 entry = find_entry(&entries, dback->disk_bytenr,
8526                                    dback->bytes);
8527                 if (!entry) {
8528                         entry = malloc(sizeof(struct extent_entry));
8529                         if (!entry) {
8530                                 ret = -ENOMEM;
8531                                 goto out;
8532                         }
8533                         memset(entry, 0, sizeof(*entry));
8534                         entry->bytenr = dback->disk_bytenr;
8535                         entry->bytes = dback->bytes;
8536                         list_add_tail(&entry->list, &entries);
8537                         nr_entries++;
8538                 }
8539
8540                 /*
8541                  * If we only have on entry we may think the entries agree when
8542                  * in reality they don't so we have to do some extra checking.
8543                  */
8544                 if (dback->disk_bytenr != rec->start ||
8545                     dback->bytes != rec->nr || back->broken)
8546                         mismatch = 1;
8547
8548                 if (back->broken) {
8549                         entry->broken++;
8550                         broken_entries++;
8551                 }
8552
8553                 entry->count++;
8554         }
8555
8556         /* Yay all the backrefs agree, carry on good sir */
8557         if (nr_entries <= 1 && !mismatch)
8558                 goto out;
8559
8560         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8561                 "%Lu\n", rec->start);
8562
8563         /*
8564          * First we want to see if the backrefs can agree amongst themselves who
8565          * is right, so figure out which one of the entries has the highest
8566          * count.
8567          */
8568         best = find_most_right_entry(&entries);
8569
8570         /*
8571          * Ok so we may have an even split between what the backrefs think, so
8572          * this is where we use the extent ref to see what it thinks.
8573          */
8574         if (!best) {
8575                 entry = find_entry(&entries, rec->start, rec->nr);
8576                 if (!entry && (!broken_entries || !rec->found_rec)) {
8577                         fprintf(stderr, "Backrefs don't agree with each other "
8578                                 "and extent record doesn't agree with anybody,"
8579                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8580                                 rec->start, rec->nr);
8581                         ret = -EINVAL;
8582                         goto out;
8583                 } else if (!entry) {
8584                         /*
8585                          * Ok our backrefs were broken, we'll assume this is the
8586                          * correct value and add an entry for this range.
8587                          */
8588                         entry = malloc(sizeof(struct extent_entry));
8589                         if (!entry) {
8590                                 ret = -ENOMEM;
8591                                 goto out;
8592                         }
8593                         memset(entry, 0, sizeof(*entry));
8594                         entry->bytenr = rec->start;
8595                         entry->bytes = rec->nr;
8596                         list_add_tail(&entry->list, &entries);
8597                         nr_entries++;
8598                 }
8599                 entry->count++;
8600                 best = find_most_right_entry(&entries);
8601                 if (!best) {
8602                         fprintf(stderr, "Backrefs and extent record evenly "
8603                                 "split on who is right, this is going to "
8604                                 "require user input to fix bytenr %Lu bytes "
8605                                 "%Lu\n", rec->start, rec->nr);
8606                         ret = -EINVAL;
8607                         goto out;
8608                 }
8609         }
8610
8611         /*
8612          * I don't think this can happen currently as we'll abort() if we catch
8613          * this case higher up, but in case somebody removes that we still can't
8614          * deal with it properly here yet, so just bail out of that's the case.
8615          */
8616         if (best->bytenr != rec->start) {
8617                 fprintf(stderr, "Extent start and backref starts don't match, "
8618                         "please use btrfs-image on this file system and send "
8619                         "it to a btrfs developer so they can make fsck fix "
8620                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8621                         rec->start, rec->nr);
8622                 ret = -EINVAL;
8623                 goto out;
8624         }
8625
8626         /*
8627          * Ok great we all agreed on an extent record, let's go find the real
8628          * references and fix up the ones that don't match.
8629          */
8630         list_for_each_entry(back, &rec->backrefs, list) {
8631                 if (back->full_backref || !back->is_data)
8632                         continue;
8633
8634                 dback = to_data_backref(back);
8635
8636                 /*
8637                  * Still ignoring backrefs that don't have a real ref attached
8638                  * to them.
8639                  */
8640                 if (dback->found_ref == 0)
8641                         continue;
8642
8643                 if (dback->bytes == best->bytes &&
8644                     dback->disk_bytenr == best->bytenr)
8645                         continue;
8646
8647                 ret = repair_ref(info, path, dback, best);
8648                 if (ret)
8649                         goto out;
8650         }
8651
8652         /*
8653          * Ok we messed with the actual refs, which means we need to drop our
8654          * entire cache and go back and rescan.  I know this is a huge pain and
8655          * adds a lot of extra work, but it's the only way to be safe.  Once all
8656          * the backrefs agree we may not need to do anything to the extent
8657          * record itself.
8658          */
8659         ret = -EAGAIN;
8660 out:
8661         while (!list_empty(&entries)) {
8662                 entry = list_entry(entries.next, struct extent_entry, list);
8663                 list_del_init(&entry->list);
8664                 free(entry);
8665         }
8666         return ret;
8667 }
8668
8669 static int process_duplicates(struct cache_tree *extent_cache,
8670                               struct extent_record *rec)
8671 {
8672         struct extent_record *good, *tmp;
8673         struct cache_extent *cache;
8674         int ret;
8675
8676         /*
8677          * If we found a extent record for this extent then return, or if we
8678          * have more than one duplicate we are likely going to need to delete
8679          * something.
8680          */
8681         if (rec->found_rec || rec->num_duplicates > 1)
8682                 return 0;
8683
8684         /* Shouldn't happen but just in case */
8685         BUG_ON(!rec->num_duplicates);
8686
8687         /*
8688          * So this happens if we end up with a backref that doesn't match the
8689          * actual extent entry.  So either the backref is bad or the extent
8690          * entry is bad.  Either way we want to have the extent_record actually
8691          * reflect what we found in the extent_tree, so we need to take the
8692          * duplicate out and use that as the extent_record since the only way we
8693          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8694          */
8695         remove_cache_extent(extent_cache, &rec->cache);
8696
8697         good = to_extent_record(rec->dups.next);
8698         list_del_init(&good->list);
8699         INIT_LIST_HEAD(&good->backrefs);
8700         INIT_LIST_HEAD(&good->dups);
8701         good->cache.start = good->start;
8702         good->cache.size = good->nr;
8703         good->content_checked = 0;
8704         good->owner_ref_checked = 0;
8705         good->num_duplicates = 0;
8706         good->refs = rec->refs;
8707         list_splice_init(&rec->backrefs, &good->backrefs);
8708         while (1) {
8709                 cache = lookup_cache_extent(extent_cache, good->start,
8710                                             good->nr);
8711                 if (!cache)
8712                         break;
8713                 tmp = container_of(cache, struct extent_record, cache);
8714
8715                 /*
8716                  * If we find another overlapping extent and it's found_rec is
8717                  * set then it's a duplicate and we need to try and delete
8718                  * something.
8719                  */
8720                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8721                         if (list_empty(&good->list))
8722                                 list_add_tail(&good->list,
8723                                               &duplicate_extents);
8724                         good->num_duplicates += tmp->num_duplicates + 1;
8725                         list_splice_init(&tmp->dups, &good->dups);
8726                         list_del_init(&tmp->list);
8727                         list_add_tail(&tmp->list, &good->dups);
8728                         remove_cache_extent(extent_cache, &tmp->cache);
8729                         continue;
8730                 }
8731
8732                 /*
8733                  * Ok we have another non extent item backed extent rec, so lets
8734                  * just add it to this extent and carry on like we did above.
8735                  */
8736                 good->refs += tmp->refs;
8737                 list_splice_init(&tmp->backrefs, &good->backrefs);
8738                 remove_cache_extent(extent_cache, &tmp->cache);
8739                 free(tmp);
8740         }
8741         ret = insert_cache_extent(extent_cache, &good->cache);
8742         BUG_ON(ret);
8743         free(rec);
8744         return good->num_duplicates ? 0 : 1;
8745 }
8746
8747 static int delete_duplicate_records(struct btrfs_root *root,
8748                                     struct extent_record *rec)
8749 {
8750         struct btrfs_trans_handle *trans;
8751         LIST_HEAD(delete_list);
8752         struct btrfs_path path;
8753         struct extent_record *tmp, *good, *n;
8754         int nr_del = 0;
8755         int ret = 0, err;
8756         struct btrfs_key key;
8757
8758         btrfs_init_path(&path);
8759
8760         good = rec;
8761         /* Find the record that covers all of the duplicates. */
8762         list_for_each_entry(tmp, &rec->dups, list) {
8763                 if (good->start < tmp->start)
8764                         continue;
8765                 if (good->nr > tmp->nr)
8766                         continue;
8767
8768                 if (tmp->start + tmp->nr < good->start + good->nr) {
8769                         fprintf(stderr, "Ok we have overlapping extents that "
8770                                 "aren't completely covered by each other, this "
8771                                 "is going to require more careful thought.  "
8772                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8773                                 tmp->start, tmp->nr, good->start, good->nr);
8774                         abort();
8775                 }
8776                 good = tmp;
8777         }
8778
8779         if (good != rec)
8780                 list_add_tail(&rec->list, &delete_list);
8781
8782         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8783                 if (tmp == good)
8784                         continue;
8785                 list_move_tail(&tmp->list, &delete_list);
8786         }
8787
8788         root = root->fs_info->extent_root;
8789         trans = btrfs_start_transaction(root, 1);
8790         if (IS_ERR(trans)) {
8791                 ret = PTR_ERR(trans);
8792                 goto out;
8793         }
8794
8795         list_for_each_entry(tmp, &delete_list, list) {
8796                 if (tmp->found_rec == 0)
8797                         continue;
8798                 key.objectid = tmp->start;
8799                 key.type = BTRFS_EXTENT_ITEM_KEY;
8800                 key.offset = tmp->nr;
8801
8802                 /* Shouldn't happen but just in case */
8803                 if (tmp->metadata) {
8804                         fprintf(stderr, "Well this shouldn't happen, extent "
8805                                 "record overlaps but is metadata? "
8806                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8807                         abort();
8808                 }
8809
8810                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8811                 if (ret) {
8812                         if (ret > 0)
8813                                 ret = -EINVAL;
8814                         break;
8815                 }
8816                 ret = btrfs_del_item(trans, root, &path);
8817                 if (ret)
8818                         break;
8819                 btrfs_release_path(&path);
8820                 nr_del++;
8821         }
8822         err = btrfs_commit_transaction(trans, root);
8823         if (err && !ret)
8824                 ret = err;
8825 out:
8826         while (!list_empty(&delete_list)) {
8827                 tmp = to_extent_record(delete_list.next);
8828                 list_del_init(&tmp->list);
8829                 if (tmp == rec)
8830                         continue;
8831                 free(tmp);
8832         }
8833
8834         while (!list_empty(&rec->dups)) {
8835                 tmp = to_extent_record(rec->dups.next);
8836                 list_del_init(&tmp->list);
8837                 free(tmp);
8838         }
8839
8840         btrfs_release_path(&path);
8841
8842         if (!ret && !nr_del)
8843                 rec->num_duplicates = 0;
8844
8845         return ret ? ret : nr_del;
8846 }
8847
8848 static int find_possible_backrefs(struct btrfs_fs_info *info,
8849                                   struct btrfs_path *path,
8850                                   struct cache_tree *extent_cache,
8851                                   struct extent_record *rec)
8852 {
8853         struct btrfs_root *root;
8854         struct extent_backref *back;
8855         struct data_backref *dback;
8856         struct cache_extent *cache;
8857         struct btrfs_file_extent_item *fi;
8858         struct btrfs_key key;
8859         u64 bytenr, bytes;
8860         int ret;
8861
8862         list_for_each_entry(back, &rec->backrefs, list) {
8863                 /* Don't care about full backrefs (poor unloved backrefs) */
8864                 if (back->full_backref || !back->is_data)
8865                         continue;
8866
8867                 dback = to_data_backref(back);
8868
8869                 /* We found this one, we don't need to do a lookup */
8870                 if (dback->found_ref)
8871                         continue;
8872
8873                 key.objectid = dback->root;
8874                 key.type = BTRFS_ROOT_ITEM_KEY;
8875                 key.offset = (u64)-1;
8876
8877                 root = btrfs_read_fs_root(info, &key);
8878
8879                 /* No root, definitely a bad ref, skip */
8880                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8881                         continue;
8882                 /* Other err, exit */
8883                 if (IS_ERR(root))
8884                         return PTR_ERR(root);
8885
8886                 key.objectid = dback->owner;
8887                 key.type = BTRFS_EXTENT_DATA_KEY;
8888                 key.offset = dback->offset;
8889                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8890                 if (ret) {
8891                         btrfs_release_path(path);
8892                         if (ret < 0)
8893                                 return ret;
8894                         /* Didn't find it, we can carry on */
8895                         ret = 0;
8896                         continue;
8897                 }
8898
8899                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8900                                     struct btrfs_file_extent_item);
8901                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8902                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8903                 btrfs_release_path(path);
8904                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8905                 if (cache) {
8906                         struct extent_record *tmp;
8907                         tmp = container_of(cache, struct extent_record, cache);
8908
8909                         /*
8910                          * If we found an extent record for the bytenr for this
8911                          * particular backref then we can't add it to our
8912                          * current extent record.  We only want to add backrefs
8913                          * that don't have a corresponding extent item in the
8914                          * extent tree since they likely belong to this record
8915                          * and we need to fix it if it doesn't match bytenrs.
8916                          */
8917                         if  (tmp->found_rec)
8918                                 continue;
8919                 }
8920
8921                 dback->found_ref += 1;
8922                 dback->disk_bytenr = bytenr;
8923                 dback->bytes = bytes;
8924
8925                 /*
8926                  * Set this so the verify backref code knows not to trust the
8927                  * values in this backref.
8928                  */
8929                 back->broken = 1;
8930         }
8931
8932         return 0;
8933 }
8934
8935 /*
8936  * Record orphan data ref into corresponding root.
8937  *
8938  * Return 0 if the extent item contains data ref and recorded.
8939  * Return 1 if the extent item contains no useful data ref
8940  *   On that case, it may contains only shared_dataref or metadata backref
8941  *   or the file extent exists(this should be handled by the extent bytenr
8942  *   recovery routine)
8943  * Return <0 if something goes wrong.
8944  */
8945 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8946                                       struct extent_record *rec)
8947 {
8948         struct btrfs_key key;
8949         struct btrfs_root *dest_root;
8950         struct extent_backref *back;
8951         struct data_backref *dback;
8952         struct orphan_data_extent *orphan;
8953         struct btrfs_path path;
8954         int recorded_data_ref = 0;
8955         int ret = 0;
8956
8957         if (rec->metadata)
8958                 return 1;
8959         btrfs_init_path(&path);
8960         list_for_each_entry(back, &rec->backrefs, list) {
8961                 if (back->full_backref || !back->is_data ||
8962                     !back->found_extent_tree)
8963                         continue;
8964                 dback = to_data_backref(back);
8965                 if (dback->found_ref)
8966                         continue;
8967                 key.objectid = dback->root;
8968                 key.type = BTRFS_ROOT_ITEM_KEY;
8969                 key.offset = (u64)-1;
8970
8971                 dest_root = btrfs_read_fs_root(fs_info, &key);
8972
8973                 /* For non-exist root we just skip it */
8974                 if (IS_ERR(dest_root) || !dest_root)
8975                         continue;
8976
8977                 key.objectid = dback->owner;
8978                 key.type = BTRFS_EXTENT_DATA_KEY;
8979                 key.offset = dback->offset;
8980
8981                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8982                 btrfs_release_path(&path);
8983                 /*
8984                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8985                  * we need to record it for inode/file extent rebuild.
8986                  * For ret > 0, we record it only for file extent rebuild.
8987                  * For ret == 0, the file extent exists but only bytenr
8988                  * mismatch, let the original bytenr fix routine to handle,
8989                  * don't record it.
8990                  */
8991                 if (ret == 0)
8992                         continue;
8993                 ret = 0;
8994                 orphan = malloc(sizeof(*orphan));
8995                 if (!orphan) {
8996                         ret = -ENOMEM;
8997                         goto out;
8998                 }
8999                 INIT_LIST_HEAD(&orphan->list);
9000                 orphan->root = dback->root;
9001                 orphan->objectid = dback->owner;
9002                 orphan->offset = dback->offset;
9003                 orphan->disk_bytenr = rec->cache.start;
9004                 orphan->disk_len = rec->cache.size;
9005                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9006                 recorded_data_ref = 1;
9007         }
9008 out:
9009         btrfs_release_path(&path);
9010         if (!ret)
9011                 return !recorded_data_ref;
9012         else
9013                 return ret;
9014 }
9015
9016 /*
9017  * when an incorrect extent item is found, this will delete
9018  * all of the existing entries for it and recreate them
9019  * based on what the tree scan found.
9020  */
9021 static int fixup_extent_refs(struct btrfs_fs_info *info,
9022                              struct cache_tree *extent_cache,
9023                              struct extent_record *rec)
9024 {
9025         struct btrfs_trans_handle *trans = NULL;
9026         int ret;
9027         struct btrfs_path path;
9028         struct list_head *cur = rec->backrefs.next;
9029         struct cache_extent *cache;
9030         struct extent_backref *back;
9031         int allocated = 0;
9032         u64 flags = 0;
9033
9034         if (rec->flag_block_full_backref)
9035                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9036
9037         btrfs_init_path(&path);
9038         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9039                 /*
9040                  * Sometimes the backrefs themselves are so broken they don't
9041                  * get attached to any meaningful rec, so first go back and
9042                  * check any of our backrefs that we couldn't find and throw
9043                  * them into the list if we find the backref so that
9044                  * verify_backrefs can figure out what to do.
9045                  */
9046                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9047                 if (ret < 0)
9048                         goto out;
9049         }
9050
9051         /* step one, make sure all of the backrefs agree */
9052         ret = verify_backrefs(info, &path, rec);
9053         if (ret < 0)
9054                 goto out;
9055
9056         trans = btrfs_start_transaction(info->extent_root, 1);
9057         if (IS_ERR(trans)) {
9058                 ret = PTR_ERR(trans);
9059                 goto out;
9060         }
9061
9062         /* step two, delete all the existing records */
9063         ret = delete_extent_records(trans, info->extent_root, &path,
9064                                     rec->start);
9065
9066         if (ret < 0)
9067                 goto out;
9068
9069         /* was this block corrupt?  If so, don't add references to it */
9070         cache = lookup_cache_extent(info->corrupt_blocks,
9071                                     rec->start, rec->max_size);
9072         if (cache) {
9073                 ret = 0;
9074                 goto out;
9075         }
9076
9077         /* step three, recreate all the refs we did find */
9078         while(cur != &rec->backrefs) {
9079                 back = to_extent_backref(cur);
9080                 cur = cur->next;
9081
9082                 /*
9083                  * if we didn't find any references, don't create a
9084                  * new extent record
9085                  */
9086                 if (!back->found_ref)
9087                         continue;
9088
9089                 rec->bad_full_backref = 0;
9090                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9091                 allocated = 1;
9092
9093                 if (ret)
9094                         goto out;
9095         }
9096 out:
9097         if (trans) {
9098                 int err = btrfs_commit_transaction(trans, info->extent_root);
9099                 if (!ret)
9100                         ret = err;
9101         }
9102
9103         if (!ret)
9104                 fprintf(stderr, "Repaired extent references for %llu\n",
9105                                 (unsigned long long)rec->start);
9106
9107         btrfs_release_path(&path);
9108         return ret;
9109 }
9110
9111 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9112                               struct extent_record *rec)
9113 {
9114         struct btrfs_trans_handle *trans;
9115         struct btrfs_root *root = fs_info->extent_root;
9116         struct btrfs_path path;
9117         struct btrfs_extent_item *ei;
9118         struct btrfs_key key;
9119         u64 flags;
9120         int ret = 0;
9121
9122         key.objectid = rec->start;
9123         if (rec->metadata) {
9124                 key.type = BTRFS_METADATA_ITEM_KEY;
9125                 key.offset = rec->info_level;
9126         } else {
9127                 key.type = BTRFS_EXTENT_ITEM_KEY;
9128                 key.offset = rec->max_size;
9129         }
9130
9131         trans = btrfs_start_transaction(root, 0);
9132         if (IS_ERR(trans))
9133                 return PTR_ERR(trans);
9134
9135         btrfs_init_path(&path);
9136         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9137         if (ret < 0) {
9138                 btrfs_release_path(&path);
9139                 btrfs_commit_transaction(trans, root);
9140                 return ret;
9141         } else if (ret) {
9142                 fprintf(stderr, "Didn't find extent for %llu\n",
9143                         (unsigned long long)rec->start);
9144                 btrfs_release_path(&path);
9145                 btrfs_commit_transaction(trans, root);
9146                 return -ENOENT;
9147         }
9148
9149         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9150                             struct btrfs_extent_item);
9151         flags = btrfs_extent_flags(path.nodes[0], ei);
9152         if (rec->flag_block_full_backref) {
9153                 fprintf(stderr, "setting full backref on %llu\n",
9154                         (unsigned long long)key.objectid);
9155                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9156         } else {
9157                 fprintf(stderr, "clearing full backref on %llu\n",
9158                         (unsigned long long)key.objectid);
9159                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9160         }
9161         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9162         btrfs_mark_buffer_dirty(path.nodes[0]);
9163         btrfs_release_path(&path);
9164         ret = btrfs_commit_transaction(trans, root);
9165         if (!ret)
9166                 fprintf(stderr, "Repaired extent flags for %llu\n",
9167                                 (unsigned long long)rec->start);
9168
9169         return ret;
9170 }
9171
9172 /* right now we only prune from the extent allocation tree */
9173 static int prune_one_block(struct btrfs_trans_handle *trans,
9174                            struct btrfs_fs_info *info,
9175                            struct btrfs_corrupt_block *corrupt)
9176 {
9177         int ret;
9178         struct btrfs_path path;
9179         struct extent_buffer *eb;
9180         u64 found;
9181         int slot;
9182         int nritems;
9183         int level = corrupt->level + 1;
9184
9185         btrfs_init_path(&path);
9186 again:
9187         /* we want to stop at the parent to our busted block */
9188         path.lowest_level = level;
9189
9190         ret = btrfs_search_slot(trans, info->extent_root,
9191                                 &corrupt->key, &path, -1, 1);
9192
9193         if (ret < 0)
9194                 goto out;
9195
9196         eb = path.nodes[level];
9197         if (!eb) {
9198                 ret = -ENOENT;
9199                 goto out;
9200         }
9201
9202         /*
9203          * hopefully the search gave us the block we want to prune,
9204          * lets try that first
9205          */
9206         slot = path.slots[level];
9207         found =  btrfs_node_blockptr(eb, slot);
9208         if (found == corrupt->cache.start)
9209                 goto del_ptr;
9210
9211         nritems = btrfs_header_nritems(eb);
9212
9213         /* the search failed, lets scan this node and hope we find it */
9214         for (slot = 0; slot < nritems; slot++) {
9215                 found =  btrfs_node_blockptr(eb, slot);
9216                 if (found == corrupt->cache.start)
9217                         goto del_ptr;
9218         }
9219         /*
9220          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9221          * to this block
9222          */
9223         if (eb == info->extent_root->node) {
9224                 ret = -ENOENT;
9225                 goto out;
9226         } else {
9227                 level++;
9228                 btrfs_release_path(&path);
9229                 goto again;
9230         }
9231
9232 del_ptr:
9233         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9234         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9235
9236 out:
9237         btrfs_release_path(&path);
9238         return ret;
9239 }
9240
9241 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9242 {
9243         struct btrfs_trans_handle *trans = NULL;
9244         struct cache_extent *cache;
9245         struct btrfs_corrupt_block *corrupt;
9246
9247         while (1) {
9248                 cache = search_cache_extent(info->corrupt_blocks, 0);
9249                 if (!cache)
9250                         break;
9251                 if (!trans) {
9252                         trans = btrfs_start_transaction(info->extent_root, 1);
9253                         if (IS_ERR(trans))
9254                                 return PTR_ERR(trans);
9255                 }
9256                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9257                 prune_one_block(trans, info, corrupt);
9258                 remove_cache_extent(info->corrupt_blocks, cache);
9259         }
9260         if (trans)
9261                 return btrfs_commit_transaction(trans, info->extent_root);
9262         return 0;
9263 }
9264
9265 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9266 {
9267         struct btrfs_block_group_cache *cache;
9268         u64 start, end;
9269         int ret;
9270
9271         while (1) {
9272                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9273                                             &start, &end, EXTENT_DIRTY);
9274                 if (ret)
9275                         break;
9276                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9277         }
9278
9279         start = 0;
9280         while (1) {
9281                 cache = btrfs_lookup_first_block_group(fs_info, start);
9282                 if (!cache)
9283                         break;
9284                 if (cache->cached)
9285                         cache->cached = 0;
9286                 start = cache->key.objectid + cache->key.offset;
9287         }
9288 }
9289
9290 static int check_extent_refs(struct btrfs_root *root,
9291                              struct cache_tree *extent_cache)
9292 {
9293         struct extent_record *rec;
9294         struct cache_extent *cache;
9295         int ret = 0;
9296         int had_dups = 0;
9297
9298         if (repair) {
9299                 /*
9300                  * if we're doing a repair, we have to make sure
9301                  * we don't allocate from the problem extents.
9302                  * In the worst case, this will be all the
9303                  * extents in the FS
9304                  */
9305                 cache = search_cache_extent(extent_cache, 0);
9306                 while(cache) {
9307                         rec = container_of(cache, struct extent_record, cache);
9308                         set_extent_dirty(root->fs_info->excluded_extents,
9309                                          rec->start,
9310                                          rec->start + rec->max_size - 1);
9311                         cache = next_cache_extent(cache);
9312                 }
9313
9314                 /* pin down all the corrupted blocks too */
9315                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9316                 while(cache) {
9317                         set_extent_dirty(root->fs_info->excluded_extents,
9318                                          cache->start,
9319                                          cache->start + cache->size - 1);
9320                         cache = next_cache_extent(cache);
9321                 }
9322                 prune_corrupt_blocks(root->fs_info);
9323                 reset_cached_block_groups(root->fs_info);
9324         }
9325
9326         reset_cached_block_groups(root->fs_info);
9327
9328         /*
9329          * We need to delete any duplicate entries we find first otherwise we
9330          * could mess up the extent tree when we have backrefs that actually
9331          * belong to a different extent item and not the weird duplicate one.
9332          */
9333         while (repair && !list_empty(&duplicate_extents)) {
9334                 rec = to_extent_record(duplicate_extents.next);
9335                 list_del_init(&rec->list);
9336
9337                 /* Sometimes we can find a backref before we find an actual
9338                  * extent, so we need to process it a little bit to see if there
9339                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9340                  * if this is a backref screwup.  If we need to delete stuff
9341                  * process_duplicates() will return 0, otherwise it will return
9342                  * 1 and we
9343                  */
9344                 if (process_duplicates(extent_cache, rec))
9345                         continue;
9346                 ret = delete_duplicate_records(root, rec);
9347                 if (ret < 0)
9348                         return ret;
9349                 /*
9350                  * delete_duplicate_records will return the number of entries
9351                  * deleted, so if it's greater than 0 then we know we actually
9352                  * did something and we need to remove.
9353                  */
9354                 if (ret)
9355                         had_dups = 1;
9356         }
9357
9358         if (had_dups)
9359                 return -EAGAIN;
9360
9361         while(1) {
9362                 int cur_err = 0;
9363                 int fix = 0;
9364
9365                 cache = search_cache_extent(extent_cache, 0);
9366                 if (!cache)
9367                         break;
9368                 rec = container_of(cache, struct extent_record, cache);
9369                 if (rec->num_duplicates) {
9370                         fprintf(stderr, "extent item %llu has multiple extent "
9371                                 "items\n", (unsigned long long)rec->start);
9372                         cur_err = 1;
9373                 }
9374
9375                 if (rec->refs != rec->extent_item_refs) {
9376                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9377                                 (unsigned long long)rec->start,
9378                                 (unsigned long long)rec->nr);
9379                         fprintf(stderr, "extent item %llu, found %llu\n",
9380                                 (unsigned long long)rec->extent_item_refs,
9381                                 (unsigned long long)rec->refs);
9382                         ret = record_orphan_data_extents(root->fs_info, rec);
9383                         if (ret < 0)
9384                                 goto repair_abort;
9385                         fix = ret;
9386                         cur_err = 1;
9387                 }
9388                 if (all_backpointers_checked(rec, 1)) {
9389                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9390                                 (unsigned long long)rec->start,
9391                                 (unsigned long long)rec->nr);
9392                         fix = 1;
9393                         cur_err = 1;
9394                 }
9395                 if (!rec->owner_ref_checked) {
9396                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9397                                 (unsigned long long)rec->start,
9398                                 (unsigned long long)rec->nr);
9399                         fix = 1;
9400                         cur_err = 1;
9401                 }
9402
9403                 if (repair && fix) {
9404                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9405                         if (ret)
9406                                 goto repair_abort;
9407                 }
9408
9409
9410                 if (rec->bad_full_backref) {
9411                         fprintf(stderr, "bad full backref, on [%llu]\n",
9412                                 (unsigned long long)rec->start);
9413                         if (repair) {
9414                                 ret = fixup_extent_flags(root->fs_info, rec);
9415                                 if (ret)
9416                                         goto repair_abort;
9417                                 fix = 1;
9418                         }
9419                         cur_err = 1;
9420                 }
9421                 /*
9422                  * Although it's not a extent ref's problem, we reuse this
9423                  * routine for error reporting.
9424                  * No repair function yet.
9425                  */
9426                 if (rec->crossing_stripes) {
9427                         fprintf(stderr,
9428                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9429                                 rec->start, rec->start + rec->max_size);
9430                         cur_err = 1;
9431                 }
9432
9433                 if (rec->wrong_chunk_type) {
9434                         fprintf(stderr,
9435                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9436                                 rec->start, rec->start + rec->max_size);
9437                         cur_err = 1;
9438                 }
9439
9440                 remove_cache_extent(extent_cache, cache);
9441                 free_all_extent_backrefs(rec);
9442                 if (!init_extent_tree && repair && (!cur_err || fix))
9443                         clear_extent_dirty(root->fs_info->excluded_extents,
9444                                            rec->start,
9445                                            rec->start + rec->max_size - 1);
9446                 free(rec);
9447         }
9448 repair_abort:
9449         if (repair) {
9450                 if (ret && ret != -EAGAIN) {
9451                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9452                         exit(1);
9453                 } else if (!ret) {
9454                         struct btrfs_trans_handle *trans;
9455
9456                         root = root->fs_info->extent_root;
9457                         trans = btrfs_start_transaction(root, 1);
9458                         if (IS_ERR(trans)) {
9459                                 ret = PTR_ERR(trans);
9460                                 goto repair_abort;
9461                         }
9462
9463                         btrfs_fix_block_accounting(trans, root);
9464                         ret = btrfs_commit_transaction(trans, root);
9465                         if (ret)
9466                                 goto repair_abort;
9467                 }
9468                 return ret;
9469         }
9470         return 0;
9471 }
9472
9473 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9474 {
9475         u64 stripe_size;
9476
9477         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9478                 stripe_size = length;
9479                 stripe_size /= num_stripes;
9480         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9481                 stripe_size = length * 2;
9482                 stripe_size /= num_stripes;
9483         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9484                 stripe_size = length;
9485                 stripe_size /= (num_stripes - 1);
9486         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9487                 stripe_size = length;
9488                 stripe_size /= (num_stripes - 2);
9489         } else {
9490                 stripe_size = length;
9491         }
9492         return stripe_size;
9493 }
9494
9495 /*
9496  * Check the chunk with its block group/dev list ref:
9497  * Return 0 if all refs seems valid.
9498  * Return 1 if part of refs seems valid, need later check for rebuild ref
9499  * like missing block group and needs to search extent tree to rebuild them.
9500  * Return -1 if essential refs are missing and unable to rebuild.
9501  */
9502 static int check_chunk_refs(struct chunk_record *chunk_rec,
9503                             struct block_group_tree *block_group_cache,
9504                             struct device_extent_tree *dev_extent_cache,
9505                             int silent)
9506 {
9507         struct cache_extent *block_group_item;
9508         struct block_group_record *block_group_rec;
9509         struct cache_extent *dev_extent_item;
9510         struct device_extent_record *dev_extent_rec;
9511         u64 devid;
9512         u64 offset;
9513         u64 length;
9514         int metadump_v2 = 0;
9515         int i;
9516         int ret = 0;
9517
9518         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9519                                                chunk_rec->offset,
9520                                                chunk_rec->length);
9521         if (block_group_item) {
9522                 block_group_rec = container_of(block_group_item,
9523                                                struct block_group_record,
9524                                                cache);
9525                 if (chunk_rec->length != block_group_rec->offset ||
9526                     chunk_rec->offset != block_group_rec->objectid ||
9527                     (!metadump_v2 &&
9528                      chunk_rec->type_flags != block_group_rec->flags)) {
9529                         if (!silent)
9530                                 fprintf(stderr,
9531                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9532                                         chunk_rec->objectid,
9533                                         chunk_rec->type,
9534                                         chunk_rec->offset,
9535                                         chunk_rec->length,
9536                                         chunk_rec->offset,
9537                                         chunk_rec->type_flags,
9538                                         block_group_rec->objectid,
9539                                         block_group_rec->type,
9540                                         block_group_rec->offset,
9541                                         block_group_rec->offset,
9542                                         block_group_rec->objectid,
9543                                         block_group_rec->flags);
9544                         ret = -1;
9545                 } else {
9546                         list_del_init(&block_group_rec->list);
9547                         chunk_rec->bg_rec = block_group_rec;
9548                 }
9549         } else {
9550                 if (!silent)
9551                         fprintf(stderr,
9552                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9553                                 chunk_rec->objectid,
9554                                 chunk_rec->type,
9555                                 chunk_rec->offset,
9556                                 chunk_rec->length,
9557                                 chunk_rec->offset,
9558                                 chunk_rec->type_flags);
9559                 ret = 1;
9560         }
9561
9562         if (metadump_v2)
9563                 return ret;
9564
9565         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9566                                     chunk_rec->num_stripes);
9567         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9568                 devid = chunk_rec->stripes[i].devid;
9569                 offset = chunk_rec->stripes[i].offset;
9570                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9571                                                        devid, offset, length);
9572                 if (dev_extent_item) {
9573                         dev_extent_rec = container_of(dev_extent_item,
9574                                                 struct device_extent_record,
9575                                                 cache);
9576                         if (dev_extent_rec->objectid != devid ||
9577                             dev_extent_rec->offset != offset ||
9578                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9579                             dev_extent_rec->length != length) {
9580                                 if (!silent)
9581                                         fprintf(stderr,
9582                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9583                                                 chunk_rec->objectid,
9584                                                 chunk_rec->type,
9585                                                 chunk_rec->offset,
9586                                                 chunk_rec->stripes[i].devid,
9587                                                 chunk_rec->stripes[i].offset,
9588                                                 dev_extent_rec->objectid,
9589                                                 dev_extent_rec->offset,
9590                                                 dev_extent_rec->length);
9591                                 ret = -1;
9592                         } else {
9593                                 list_move(&dev_extent_rec->chunk_list,
9594                                           &chunk_rec->dextents);
9595                         }
9596                 } else {
9597                         if (!silent)
9598                                 fprintf(stderr,
9599                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9600                                         chunk_rec->objectid,
9601                                         chunk_rec->type,
9602                                         chunk_rec->offset,
9603                                         chunk_rec->stripes[i].devid,
9604                                         chunk_rec->stripes[i].offset);
9605                         ret = -1;
9606                 }
9607         }
9608         return ret;
9609 }
9610
9611 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9612 int check_chunks(struct cache_tree *chunk_cache,
9613                  struct block_group_tree *block_group_cache,
9614                  struct device_extent_tree *dev_extent_cache,
9615                  struct list_head *good, struct list_head *bad,
9616                  struct list_head *rebuild, int silent)
9617 {
9618         struct cache_extent *chunk_item;
9619         struct chunk_record *chunk_rec;
9620         struct block_group_record *bg_rec;
9621         struct device_extent_record *dext_rec;
9622         int err;
9623         int ret = 0;
9624
9625         chunk_item = first_cache_extent(chunk_cache);
9626         while (chunk_item) {
9627                 chunk_rec = container_of(chunk_item, struct chunk_record,
9628                                          cache);
9629                 err = check_chunk_refs(chunk_rec, block_group_cache,
9630                                        dev_extent_cache, silent);
9631                 if (err < 0)
9632                         ret = err;
9633                 if (err == 0 && good)
9634                         list_add_tail(&chunk_rec->list, good);
9635                 if (err > 0 && rebuild)
9636                         list_add_tail(&chunk_rec->list, rebuild);
9637                 if (err < 0 && bad)
9638                         list_add_tail(&chunk_rec->list, bad);
9639                 chunk_item = next_cache_extent(chunk_item);
9640         }
9641
9642         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9643                 if (!silent)
9644                         fprintf(stderr,
9645                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9646                                 bg_rec->objectid,
9647                                 bg_rec->offset,
9648                                 bg_rec->flags);
9649                 if (!ret)
9650                         ret = 1;
9651         }
9652
9653         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9654                             chunk_list) {
9655                 if (!silent)
9656                         fprintf(stderr,
9657                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9658                                 dext_rec->objectid,
9659                                 dext_rec->offset,
9660                                 dext_rec->length);
9661                 if (!ret)
9662                         ret = 1;
9663         }
9664         return ret;
9665 }
9666
9667
9668 static int check_device_used(struct device_record *dev_rec,
9669                              struct device_extent_tree *dext_cache)
9670 {
9671         struct cache_extent *cache;
9672         struct device_extent_record *dev_extent_rec;
9673         u64 total_byte = 0;
9674
9675         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9676         while (cache) {
9677                 dev_extent_rec = container_of(cache,
9678                                               struct device_extent_record,
9679                                               cache);
9680                 if (dev_extent_rec->objectid != dev_rec->devid)
9681                         break;
9682
9683                 list_del_init(&dev_extent_rec->device_list);
9684                 total_byte += dev_extent_rec->length;
9685                 cache = next_cache_extent(cache);
9686         }
9687
9688         if (total_byte != dev_rec->byte_used) {
9689                 fprintf(stderr,
9690                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9691                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9692                         dev_rec->type, dev_rec->offset);
9693                 return -1;
9694         } else {
9695                 return 0;
9696         }
9697 }
9698
9699 /* check btrfs_dev_item -> btrfs_dev_extent */
9700 static int check_devices(struct rb_root *dev_cache,
9701                          struct device_extent_tree *dev_extent_cache)
9702 {
9703         struct rb_node *dev_node;
9704         struct device_record *dev_rec;
9705         struct device_extent_record *dext_rec;
9706         int err;
9707         int ret = 0;
9708
9709         dev_node = rb_first(dev_cache);
9710         while (dev_node) {
9711                 dev_rec = container_of(dev_node, struct device_record, node);
9712                 err = check_device_used(dev_rec, dev_extent_cache);
9713                 if (err)
9714                         ret = err;
9715
9716                 dev_node = rb_next(dev_node);
9717         }
9718         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9719                             device_list) {
9720                 fprintf(stderr,
9721                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9722                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9723                 if (!ret)
9724                         ret = 1;
9725         }
9726         return ret;
9727 }
9728
9729 static int add_root_item_to_list(struct list_head *head,
9730                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9731                                   u8 level, u8 drop_level,
9732                                   int level_size, struct btrfs_key *drop_key)
9733 {
9734
9735         struct root_item_record *ri_rec;
9736         ri_rec = malloc(sizeof(*ri_rec));
9737         if (!ri_rec)
9738                 return -ENOMEM;
9739         ri_rec->bytenr = bytenr;
9740         ri_rec->objectid = objectid;
9741         ri_rec->level = level;
9742         ri_rec->level_size = level_size;
9743         ri_rec->drop_level = drop_level;
9744         ri_rec->last_snapshot = last_snapshot;
9745         if (drop_key)
9746                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9747         list_add_tail(&ri_rec->list, head);
9748
9749         return 0;
9750 }
9751
9752 static void free_root_item_list(struct list_head *list)
9753 {
9754         struct root_item_record *ri_rec;
9755
9756         while (!list_empty(list)) {
9757                 ri_rec = list_first_entry(list, struct root_item_record,
9758                                           list);
9759                 list_del_init(&ri_rec->list);
9760                 free(ri_rec);
9761         }
9762 }
9763
9764 static int deal_root_from_list(struct list_head *list,
9765                                struct btrfs_root *root,
9766                                struct block_info *bits,
9767                                int bits_nr,
9768                                struct cache_tree *pending,
9769                                struct cache_tree *seen,
9770                                struct cache_tree *reada,
9771                                struct cache_tree *nodes,
9772                                struct cache_tree *extent_cache,
9773                                struct cache_tree *chunk_cache,
9774                                struct rb_root *dev_cache,
9775                                struct block_group_tree *block_group_cache,
9776                                struct device_extent_tree *dev_extent_cache)
9777 {
9778         int ret = 0;
9779         u64 last;
9780
9781         while (!list_empty(list)) {
9782                 struct root_item_record *rec;
9783                 struct extent_buffer *buf;
9784                 rec = list_entry(list->next,
9785                                  struct root_item_record, list);
9786                 last = 0;
9787                 buf = read_tree_block(root->fs_info,
9788                                       rec->bytenr, rec->level_size, 0);
9789                 if (!extent_buffer_uptodate(buf)) {
9790                         free_extent_buffer(buf);
9791                         ret = -EIO;
9792                         break;
9793                 }
9794                 ret = add_root_to_pending(buf, extent_cache, pending,
9795                                     seen, nodes, rec->objectid);
9796                 if (ret < 0)
9797                         break;
9798                 /*
9799                  * To rebuild extent tree, we need deal with snapshot
9800                  * one by one, otherwise we deal with node firstly which
9801                  * can maximize readahead.
9802                  */
9803                 while (1) {
9804                         ret = run_next_block(root, bits, bits_nr, &last,
9805                                              pending, seen, reada, nodes,
9806                                              extent_cache, chunk_cache,
9807                                              dev_cache, block_group_cache,
9808                                              dev_extent_cache, rec);
9809                         if (ret != 0)
9810                                 break;
9811                 }
9812                 free_extent_buffer(buf);
9813                 list_del(&rec->list);
9814                 free(rec);
9815                 if (ret < 0)
9816                         break;
9817         }
9818         while (ret >= 0) {
9819                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9820                                      reada, nodes, extent_cache, chunk_cache,
9821                                      dev_cache, block_group_cache,
9822                                      dev_extent_cache, NULL);
9823                 if (ret != 0) {
9824                         if (ret > 0)
9825                                 ret = 0;
9826                         break;
9827                 }
9828         }
9829         return ret;
9830 }
9831
9832 static int check_chunks_and_extents(struct btrfs_root *root)
9833 {
9834         struct rb_root dev_cache;
9835         struct cache_tree chunk_cache;
9836         struct block_group_tree block_group_cache;
9837         struct device_extent_tree dev_extent_cache;
9838         struct cache_tree extent_cache;
9839         struct cache_tree seen;
9840         struct cache_tree pending;
9841         struct cache_tree reada;
9842         struct cache_tree nodes;
9843         struct extent_io_tree excluded_extents;
9844         struct cache_tree corrupt_blocks;
9845         struct btrfs_path path;
9846         struct btrfs_key key;
9847         struct btrfs_key found_key;
9848         int ret, err = 0;
9849         struct block_info *bits;
9850         int bits_nr;
9851         struct extent_buffer *leaf;
9852         int slot;
9853         struct btrfs_root_item ri;
9854         struct list_head dropping_trees;
9855         struct list_head normal_trees;
9856         struct btrfs_root *root1;
9857         u64 objectid;
9858         u32 level_size;
9859         u8 level;
9860
9861         dev_cache = RB_ROOT;
9862         cache_tree_init(&chunk_cache);
9863         block_group_tree_init(&block_group_cache);
9864         device_extent_tree_init(&dev_extent_cache);
9865
9866         cache_tree_init(&extent_cache);
9867         cache_tree_init(&seen);
9868         cache_tree_init(&pending);
9869         cache_tree_init(&nodes);
9870         cache_tree_init(&reada);
9871         cache_tree_init(&corrupt_blocks);
9872         extent_io_tree_init(&excluded_extents);
9873         INIT_LIST_HEAD(&dropping_trees);
9874         INIT_LIST_HEAD(&normal_trees);
9875
9876         if (repair) {
9877                 root->fs_info->excluded_extents = &excluded_extents;
9878                 root->fs_info->fsck_extent_cache = &extent_cache;
9879                 root->fs_info->free_extent_hook = free_extent_hook;
9880                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9881         }
9882
9883         bits_nr = 1024;
9884         bits = malloc(bits_nr * sizeof(struct block_info));
9885         if (!bits) {
9886                 perror("malloc");
9887                 exit(1);
9888         }
9889
9890         if (ctx.progress_enabled) {
9891                 ctx.tp = TASK_EXTENTS;
9892                 task_start(ctx.info);
9893         }
9894
9895 again:
9896         root1 = root->fs_info->tree_root;
9897         level = btrfs_header_level(root1->node);
9898         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9899                                     root1->node->start, 0, level, 0,
9900                                     root1->fs_info->nodesize, NULL);
9901         if (ret < 0)
9902                 goto out;
9903         root1 = root->fs_info->chunk_root;
9904         level = btrfs_header_level(root1->node);
9905         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9906                                     root1->node->start, 0, level, 0,
9907                                     root1->fs_info->nodesize, NULL);
9908         if (ret < 0)
9909                 goto out;
9910         btrfs_init_path(&path);
9911         key.offset = 0;
9912         key.objectid = 0;
9913         key.type = BTRFS_ROOT_ITEM_KEY;
9914         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9915                                         &key, &path, 0, 0);
9916         if (ret < 0)
9917                 goto out;
9918         while(1) {
9919                 leaf = path.nodes[0];
9920                 slot = path.slots[0];
9921                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9922                         ret = btrfs_next_leaf(root, &path);
9923                         if (ret != 0)
9924                                 break;
9925                         leaf = path.nodes[0];
9926                         slot = path.slots[0];
9927                 }
9928                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9929                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9930                         unsigned long offset;
9931                         u64 last_snapshot;
9932
9933                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9934                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9935                         last_snapshot = btrfs_root_last_snapshot(&ri);
9936                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9937                                 level = btrfs_root_level(&ri);
9938                                 level_size = root->fs_info->nodesize;
9939                                 ret = add_root_item_to_list(&normal_trees,
9940                                                 found_key.objectid,
9941                                                 btrfs_root_bytenr(&ri),
9942                                                 last_snapshot, level,
9943                                                 0, level_size, NULL);
9944                                 if (ret < 0)
9945                                         goto out;
9946                         } else {
9947                                 level = btrfs_root_level(&ri);
9948                                 level_size = root->fs_info->nodesize;
9949                                 objectid = found_key.objectid;
9950                                 btrfs_disk_key_to_cpu(&found_key,
9951                                                       &ri.drop_progress);
9952                                 ret = add_root_item_to_list(&dropping_trees,
9953                                                 objectid,
9954                                                 btrfs_root_bytenr(&ri),
9955                                                 last_snapshot, level,
9956                                                 ri.drop_level,
9957                                                 level_size, &found_key);
9958                                 if (ret < 0)
9959                                         goto out;
9960                         }
9961                 }
9962                 path.slots[0]++;
9963         }
9964         btrfs_release_path(&path);
9965
9966         /*
9967          * check_block can return -EAGAIN if it fixes something, please keep
9968          * this in mind when dealing with return values from these functions, if
9969          * we get -EAGAIN we want to fall through and restart the loop.
9970          */
9971         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9972                                   &seen, &reada, &nodes, &extent_cache,
9973                                   &chunk_cache, &dev_cache, &block_group_cache,
9974                                   &dev_extent_cache);
9975         if (ret < 0) {
9976                 if (ret == -EAGAIN)
9977                         goto loop;
9978                 goto out;
9979         }
9980         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9981                                   &pending, &seen, &reada, &nodes,
9982                                   &extent_cache, &chunk_cache, &dev_cache,
9983                                   &block_group_cache, &dev_extent_cache);
9984         if (ret < 0) {
9985                 if (ret == -EAGAIN)
9986                         goto loop;
9987                 goto out;
9988         }
9989
9990         ret = check_chunks(&chunk_cache, &block_group_cache,
9991                            &dev_extent_cache, NULL, NULL, NULL, 0);
9992         if (ret) {
9993                 if (ret == -EAGAIN)
9994                         goto loop;
9995                 err = ret;
9996         }
9997
9998         ret = check_extent_refs(root, &extent_cache);
9999         if (ret < 0) {
10000                 if (ret == -EAGAIN)
10001                         goto loop;
10002                 goto out;
10003         }
10004
10005         ret = check_devices(&dev_cache, &dev_extent_cache);
10006         if (ret && err)
10007                 ret = err;
10008
10009 out:
10010         task_stop(ctx.info);
10011         if (repair) {
10012                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10013                 extent_io_tree_cleanup(&excluded_extents);
10014                 root->fs_info->fsck_extent_cache = NULL;
10015                 root->fs_info->free_extent_hook = NULL;
10016                 root->fs_info->corrupt_blocks = NULL;
10017                 root->fs_info->excluded_extents = NULL;
10018         }
10019         free(bits);
10020         free_chunk_cache_tree(&chunk_cache);
10021         free_device_cache_tree(&dev_cache);
10022         free_block_group_tree(&block_group_cache);
10023         free_device_extent_tree(&dev_extent_cache);
10024         free_extent_cache_tree(&seen);
10025         free_extent_cache_tree(&pending);
10026         free_extent_cache_tree(&reada);
10027         free_extent_cache_tree(&nodes);
10028         free_root_item_list(&normal_trees);
10029         free_root_item_list(&dropping_trees);
10030         return ret;
10031 loop:
10032         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10033         free_extent_cache_tree(&seen);
10034         free_extent_cache_tree(&pending);
10035         free_extent_cache_tree(&reada);
10036         free_extent_cache_tree(&nodes);
10037         free_chunk_cache_tree(&chunk_cache);
10038         free_block_group_tree(&block_group_cache);
10039         free_device_cache_tree(&dev_cache);
10040         free_device_extent_tree(&dev_extent_cache);
10041         free_extent_record_cache(&extent_cache);
10042         free_root_item_list(&normal_trees);
10043         free_root_item_list(&dropping_trees);
10044         extent_io_tree_cleanup(&excluded_extents);
10045         goto again;
10046 }
10047
10048 /*
10049  * Check backrefs of a tree block given by @bytenr or @eb.
10050  *
10051  * @root:       the root containing the @bytenr or @eb
10052  * @eb:         tree block extent buffer, can be NULL
10053  * @bytenr:     bytenr of the tree block to search
10054  * @level:      tree level of the tree block
10055  * @owner:      owner of the tree block
10056  *
10057  * Return >0 for any error found and output error message
10058  * Return 0 for no error found
10059  */
10060 static int check_tree_block_ref(struct btrfs_root *root,
10061                                 struct extent_buffer *eb, u64 bytenr,
10062                                 int level, u64 owner)
10063 {
10064         struct btrfs_key key;
10065         struct btrfs_root *extent_root = root->fs_info->extent_root;
10066         struct btrfs_path path;
10067         struct btrfs_extent_item *ei;
10068         struct btrfs_extent_inline_ref *iref;
10069         struct extent_buffer *leaf;
10070         unsigned long end;
10071         unsigned long ptr;
10072         int slot;
10073         int skinny_level;
10074         int type;
10075         u32 nodesize = root->fs_info->nodesize;
10076         u32 item_size;
10077         u64 offset;
10078         int tree_reloc_root = 0;
10079         int found_ref = 0;
10080         int err = 0;
10081         int ret;
10082
10083         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10084             btrfs_header_bytenr(root->node) == bytenr)
10085                 tree_reloc_root = 1;
10086
10087         btrfs_init_path(&path);
10088         key.objectid = bytenr;
10089         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10090                 key.type = BTRFS_METADATA_ITEM_KEY;
10091         else
10092                 key.type = BTRFS_EXTENT_ITEM_KEY;
10093         key.offset = (u64)-1;
10094
10095         /* Search for the backref in extent tree */
10096         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10097         if (ret < 0) {
10098                 err |= BACKREF_MISSING;
10099                 goto out;
10100         }
10101         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10102         if (ret) {
10103                 err |= BACKREF_MISSING;
10104                 goto out;
10105         }
10106
10107         leaf = path.nodes[0];
10108         slot = path.slots[0];
10109         btrfs_item_key_to_cpu(leaf, &key, slot);
10110
10111         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10112
10113         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10114                 skinny_level = (int)key.offset;
10115                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10116         } else {
10117                 struct btrfs_tree_block_info *info;
10118
10119                 info = (struct btrfs_tree_block_info *)(ei + 1);
10120                 skinny_level = btrfs_tree_block_level(leaf, info);
10121                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10122         }
10123
10124         if (eb) {
10125                 u64 header_gen;
10126                 u64 extent_gen;
10127
10128                 if (!(btrfs_extent_flags(leaf, ei) &
10129                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10130                         error(
10131                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10132                                 key.objectid, nodesize,
10133                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10134                         err = BACKREF_MISMATCH;
10135                 }
10136                 header_gen = btrfs_header_generation(eb);
10137                 extent_gen = btrfs_extent_generation(leaf, ei);
10138                 if (header_gen != extent_gen) {
10139                         error(
10140         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10141                                 key.objectid, nodesize, header_gen,
10142                                 extent_gen);
10143                         err = BACKREF_MISMATCH;
10144                 }
10145                 if (level != skinny_level) {
10146                         error(
10147                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10148                                 key.objectid, nodesize, level, skinny_level);
10149                         err = BACKREF_MISMATCH;
10150                 }
10151                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10152                         error(
10153                         "extent[%llu %u] is referred by other roots than %llu",
10154                                 key.objectid, nodesize, root->objectid);
10155                         err = BACKREF_MISMATCH;
10156                 }
10157         }
10158
10159         /*
10160          * Iterate the extent/metadata item to find the exact backref
10161          */
10162         item_size = btrfs_item_size_nr(leaf, slot);
10163         ptr = (unsigned long)iref;
10164         end = (unsigned long)ei + item_size;
10165         while (ptr < end) {
10166                 iref = (struct btrfs_extent_inline_ref *)ptr;
10167                 type = btrfs_extent_inline_ref_type(leaf, iref);
10168                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10169
10170                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10171                         (offset == root->objectid || offset == owner)) {
10172                         found_ref = 1;
10173                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10174                         /*
10175                          * Backref of tree reloc root points to itself, no need
10176                          * to check backref any more.
10177                          */
10178                         if (tree_reloc_root)
10179                                 found_ref = 1;
10180                         else
10181                         /* Check if the backref points to valid referencer */
10182                                 found_ref = !check_tree_block_ref(root, NULL,
10183                                                 offset, level + 1, owner);
10184                 }
10185
10186                 if (found_ref)
10187                         break;
10188                 ptr += btrfs_extent_inline_ref_size(type);
10189         }
10190
10191         /*
10192          * Inlined extent item doesn't have what we need, check
10193          * TREE_BLOCK_REF_KEY
10194          */
10195         if (!found_ref) {
10196                 btrfs_release_path(&path);
10197                 key.objectid = bytenr;
10198                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10199                 key.offset = root->objectid;
10200
10201                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10202                 if (!ret)
10203                         found_ref = 1;
10204         }
10205         if (!found_ref)
10206                 err |= BACKREF_MISSING;
10207 out:
10208         btrfs_release_path(&path);
10209         if (eb && (err & BACKREF_MISSING))
10210                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10211                         bytenr, nodesize, owner, level);
10212         return err;
10213 }
10214
10215 /*
10216  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10217  *
10218  * Return >0 any error found and output error message
10219  * Return 0 for no error found
10220  */
10221 static int check_extent_data_item(struct btrfs_root *root,
10222                                   struct extent_buffer *eb, int slot)
10223 {
10224         struct btrfs_file_extent_item *fi;
10225         struct btrfs_path path;
10226         struct btrfs_root *extent_root = root->fs_info->extent_root;
10227         struct btrfs_key fi_key;
10228         struct btrfs_key dbref_key;
10229         struct extent_buffer *leaf;
10230         struct btrfs_extent_item *ei;
10231         struct btrfs_extent_inline_ref *iref;
10232         struct btrfs_extent_data_ref *dref;
10233         u64 owner;
10234         u64 disk_bytenr;
10235         u64 disk_num_bytes;
10236         u64 extent_num_bytes;
10237         u64 extent_flags;
10238         u32 item_size;
10239         unsigned long end;
10240         unsigned long ptr;
10241         int type;
10242         u64 ref_root;
10243         int found_dbackref = 0;
10244         int err = 0;
10245         int ret;
10246
10247         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10248         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10249
10250         /* Nothing to check for hole and inline data extents */
10251         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10252             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10253                 return 0;
10254
10255         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10256         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10257         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10258
10259         /* Check unaligned disk_num_bytes and num_bytes */
10260         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10261                 error(
10262 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10263                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10264                         root->fs_info->sectorsize);
10265                 err |= BYTES_UNALIGNED;
10266         } else {
10267                 data_bytes_allocated += disk_num_bytes;
10268         }
10269         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10270                 error(
10271 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10272                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10273                         root->fs_info->sectorsize);
10274                 err |= BYTES_UNALIGNED;
10275         } else {
10276                 data_bytes_referenced += extent_num_bytes;
10277         }
10278         owner = btrfs_header_owner(eb);
10279
10280         /* Check the extent item of the file extent in extent tree */
10281         btrfs_init_path(&path);
10282         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10283         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10284         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10285
10286         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10287         if (ret)
10288                 goto out;
10289
10290         leaf = path.nodes[0];
10291         slot = path.slots[0];
10292         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10293
10294         extent_flags = btrfs_extent_flags(leaf, ei);
10295
10296         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10297                 error(
10298                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10299                     disk_bytenr, disk_num_bytes,
10300                     BTRFS_EXTENT_FLAG_DATA);
10301                 err |= BACKREF_MISMATCH;
10302         }
10303
10304         /* Check data backref inside that extent item */
10305         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10306         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10307         ptr = (unsigned long)iref;
10308         end = (unsigned long)ei + item_size;
10309         while (ptr < end) {
10310                 iref = (struct btrfs_extent_inline_ref *)ptr;
10311                 type = btrfs_extent_inline_ref_type(leaf, iref);
10312                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10313
10314                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10315                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10316                         if (ref_root == owner || ref_root == root->objectid)
10317                                 found_dbackref = 1;
10318                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10319                         found_dbackref = !check_tree_block_ref(root, NULL,
10320                                 btrfs_extent_inline_ref_offset(leaf, iref),
10321                                 0, owner);
10322                 }
10323
10324                 if (found_dbackref)
10325                         break;
10326                 ptr += btrfs_extent_inline_ref_size(type);
10327         }
10328
10329         if (!found_dbackref) {
10330                 btrfs_release_path(&path);
10331
10332                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10333                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10334                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10335                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10336                                 fi_key.objectid, fi_key.offset);
10337
10338                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10339                                         &dbref_key, &path, 0, 0);
10340                 if (!ret) {
10341                         found_dbackref = 1;
10342                         goto out;
10343                 }
10344
10345                 btrfs_release_path(&path);
10346
10347                 /*
10348                  * Neither inlined nor EXTENT_DATA_REF found, try
10349                  * SHARED_DATA_REF as last chance.
10350                  */
10351                 dbref_key.objectid = disk_bytenr;
10352                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10353                 dbref_key.offset = eb->start;
10354
10355                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10356                                         &dbref_key, &path, 0, 0);
10357                 if (!ret) {
10358                         found_dbackref = 1;
10359                         goto out;
10360                 }
10361         }
10362
10363 out:
10364         if (!found_dbackref)
10365                 err |= BACKREF_MISSING;
10366         btrfs_release_path(&path);
10367         if (err & BACKREF_MISSING) {
10368                 error("data extent[%llu %llu] backref lost",
10369                       disk_bytenr, disk_num_bytes);
10370         }
10371         return err;
10372 }
10373
10374 /*
10375  * Get real tree block level for the case like shared block
10376  * Return >= 0 as tree level
10377  * Return <0 for error
10378  */
10379 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10380 {
10381         struct extent_buffer *eb;
10382         struct btrfs_path path;
10383         struct btrfs_key key;
10384         struct btrfs_extent_item *ei;
10385         u64 flags;
10386         u64 transid;
10387         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10388         u8 backref_level;
10389         u8 header_level;
10390         int ret;
10391
10392         /* Search extent tree for extent generation and level */
10393         key.objectid = bytenr;
10394         key.type = BTRFS_METADATA_ITEM_KEY;
10395         key.offset = (u64)-1;
10396
10397         btrfs_init_path(&path);
10398         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10399         if (ret < 0)
10400                 goto release_out;
10401         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10402         if (ret < 0)
10403                 goto release_out;
10404         if (ret > 0) {
10405                 ret = -ENOENT;
10406                 goto release_out;
10407         }
10408
10409         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10410         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10411                             struct btrfs_extent_item);
10412         flags = btrfs_extent_flags(path.nodes[0], ei);
10413         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10414                 ret = -ENOENT;
10415                 goto release_out;
10416         }
10417
10418         /* Get transid for later read_tree_block() check */
10419         transid = btrfs_extent_generation(path.nodes[0], ei);
10420
10421         /* Get backref level as one source */
10422         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10423                 backref_level = key.offset;
10424         } else {
10425                 struct btrfs_tree_block_info *info;
10426
10427                 info = (struct btrfs_tree_block_info *)(ei + 1);
10428                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10429         }
10430         btrfs_release_path(&path);
10431
10432         /* Get level from tree block as an alternative source */
10433         eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10434         if (!extent_buffer_uptodate(eb)) {
10435                 free_extent_buffer(eb);
10436                 return -EIO;
10437         }
10438         header_level = btrfs_header_level(eb);
10439         free_extent_buffer(eb);
10440
10441         if (header_level != backref_level)
10442                 return -EIO;
10443         return header_level;
10444
10445 release_out:
10446         btrfs_release_path(&path);
10447         return ret;
10448 }
10449
10450 /*
10451  * Check if a tree block backref is valid (points to a valid tree block)
10452  * if level == -1, level will be resolved
10453  * Return >0 for any error found and print error message
10454  */
10455 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10456                                     u64 bytenr, int level)
10457 {
10458         struct btrfs_root *root;
10459         struct btrfs_key key;
10460         struct btrfs_path path;
10461         struct extent_buffer *eb;
10462         struct extent_buffer *node;
10463         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10464         int err = 0;
10465         int ret;
10466
10467         /* Query level for level == -1 special case */
10468         if (level == -1)
10469                 level = query_tree_block_level(fs_info, bytenr);
10470         if (level < 0) {
10471                 err |= REFERENCER_MISSING;
10472                 goto out;
10473         }
10474
10475         key.objectid = root_id;
10476         key.type = BTRFS_ROOT_ITEM_KEY;
10477         key.offset = (u64)-1;
10478
10479         root = btrfs_read_fs_root(fs_info, &key);
10480         if (IS_ERR(root)) {
10481                 err |= REFERENCER_MISSING;
10482                 goto out;
10483         }
10484
10485         /* Read out the tree block to get item/node key */
10486         eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10487         if (!extent_buffer_uptodate(eb)) {
10488                 err |= REFERENCER_MISSING;
10489                 free_extent_buffer(eb);
10490                 goto out;
10491         }
10492
10493         /* Empty tree, no need to check key */
10494         if (!btrfs_header_nritems(eb) && !level) {
10495                 free_extent_buffer(eb);
10496                 goto out;
10497         }
10498
10499         if (level)
10500                 btrfs_node_key_to_cpu(eb, &key, 0);
10501         else
10502                 btrfs_item_key_to_cpu(eb, &key, 0);
10503
10504         free_extent_buffer(eb);
10505
10506         btrfs_init_path(&path);
10507         path.lowest_level = level;
10508         /* Search with the first key, to ensure we can reach it */
10509         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10510         if (ret < 0) {
10511                 err |= REFERENCER_MISSING;
10512                 goto release_out;
10513         }
10514
10515         node = path.nodes[level];
10516         if (btrfs_header_bytenr(node) != bytenr) {
10517                 error(
10518         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10519                         bytenr, nodesize, bytenr,
10520                         btrfs_header_bytenr(node));
10521                 err |= REFERENCER_MISMATCH;
10522         }
10523         if (btrfs_header_level(node) != level) {
10524                 error(
10525         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10526                         bytenr, nodesize, level,
10527                         btrfs_header_level(node));
10528                 err |= REFERENCER_MISMATCH;
10529         }
10530
10531 release_out:
10532         btrfs_release_path(&path);
10533 out:
10534         if (err & REFERENCER_MISSING) {
10535                 if (level < 0)
10536                         error("extent [%llu %d] lost referencer (owner: %llu)",
10537                                 bytenr, nodesize, root_id);
10538                 else
10539                         error(
10540                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10541                                 bytenr, nodesize, root_id, level);
10542         }
10543
10544         return err;
10545 }
10546
10547 /*
10548  * Check if tree block @eb is tree reloc root.
10549  * Return 0 if it's not or any problem happens
10550  * Return 1 if it's a tree reloc root
10551  */
10552 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10553                                  struct extent_buffer *eb)
10554 {
10555         struct btrfs_root *tree_reloc_root;
10556         struct btrfs_key key;
10557         u64 bytenr = btrfs_header_bytenr(eb);
10558         u64 owner = btrfs_header_owner(eb);
10559         int ret = 0;
10560
10561         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10562         key.offset = owner;
10563         key.type = BTRFS_ROOT_ITEM_KEY;
10564
10565         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10566         if (IS_ERR(tree_reloc_root))
10567                 return 0;
10568
10569         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10570                 ret = 1;
10571         btrfs_free_fs_root(tree_reloc_root);
10572         return ret;
10573 }
10574
10575 /*
10576  * Check referencer for shared block backref
10577  * If level == -1, this function will resolve the level.
10578  */
10579 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10580                                      u64 parent, u64 bytenr, int level)
10581 {
10582         struct extent_buffer *eb;
10583         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10584         u32 nr;
10585         int found_parent = 0;
10586         int i;
10587
10588         eb = read_tree_block(fs_info, parent, nodesize, 0);
10589         if (!extent_buffer_uptodate(eb))
10590                 goto out;
10591
10592         if (level == -1)
10593                 level = query_tree_block_level(fs_info, bytenr);
10594         if (level < 0)
10595                 goto out;
10596
10597         /* It's possible it's a tree reloc root */
10598         if (parent == bytenr) {
10599                 if (is_tree_reloc_root(fs_info, eb))
10600                         found_parent = 1;
10601                 goto out;
10602         }
10603
10604         if (level + 1 != btrfs_header_level(eb))
10605                 goto out;
10606
10607         nr = btrfs_header_nritems(eb);
10608         for (i = 0; i < nr; i++) {
10609                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10610                         found_parent = 1;
10611                         break;
10612                 }
10613         }
10614 out:
10615         free_extent_buffer(eb);
10616         if (!found_parent) {
10617                 error(
10618         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10619                         bytenr, nodesize, parent, level);
10620                 return REFERENCER_MISSING;
10621         }
10622         return 0;
10623 }
10624
10625 /*
10626  * Check referencer for normal (inlined) data ref
10627  * If len == 0, it will be resolved by searching in extent tree
10628  */
10629 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10630                                      u64 root_id, u64 objectid, u64 offset,
10631                                      u64 bytenr, u64 len, u32 count)
10632 {
10633         struct btrfs_root *root;
10634         struct btrfs_root *extent_root = fs_info->extent_root;
10635         struct btrfs_key key;
10636         struct btrfs_path path;
10637         struct extent_buffer *leaf;
10638         struct btrfs_file_extent_item *fi;
10639         u32 found_count = 0;
10640         int slot;
10641         int ret = 0;
10642
10643         if (!len) {
10644                 key.objectid = bytenr;
10645                 key.type = BTRFS_EXTENT_ITEM_KEY;
10646                 key.offset = (u64)-1;
10647
10648                 btrfs_init_path(&path);
10649                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10650                 if (ret < 0)
10651                         goto out;
10652                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10653                 if (ret)
10654                         goto out;
10655                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10656                 if (key.objectid != bytenr ||
10657                     key.type != BTRFS_EXTENT_ITEM_KEY)
10658                         goto out;
10659                 len = key.offset;
10660                 btrfs_release_path(&path);
10661         }
10662         key.objectid = root_id;
10663         key.type = BTRFS_ROOT_ITEM_KEY;
10664         key.offset = (u64)-1;
10665         btrfs_init_path(&path);
10666
10667         root = btrfs_read_fs_root(fs_info, &key);
10668         if (IS_ERR(root))
10669                 goto out;
10670
10671         key.objectid = objectid;
10672         key.type = BTRFS_EXTENT_DATA_KEY;
10673         /*
10674          * It can be nasty as data backref offset is
10675          * file offset - file extent offset, which is smaller or
10676          * equal to original backref offset.  The only special case is
10677          * overflow.  So we need to special check and do further search.
10678          */
10679         key.offset = offset & (1ULL << 63) ? 0 : offset;
10680
10681         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10682         if (ret < 0)
10683                 goto out;
10684
10685         /*
10686          * Search afterwards to get correct one
10687          * NOTE: As we must do a comprehensive check on the data backref to
10688          * make sure the dref count also matches, we must iterate all file
10689          * extents for that inode.
10690          */
10691         while (1) {
10692                 leaf = path.nodes[0];
10693                 slot = path.slots[0];
10694
10695                 if (slot >= btrfs_header_nritems(leaf))
10696                         goto next;
10697                 btrfs_item_key_to_cpu(leaf, &key, slot);
10698                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10699                         break;
10700                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10701                 /*
10702                  * Except normal disk bytenr and disk num bytes, we still
10703                  * need to do extra check on dbackref offset as
10704                  * dbackref offset = file_offset - file_extent_offset
10705                  */
10706                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10707                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10708                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10709                     offset)
10710                         found_count++;
10711
10712 next:
10713                 ret = btrfs_next_item(root, &path);
10714                 if (ret)
10715                         break;
10716         }
10717 out:
10718         btrfs_release_path(&path);
10719         if (found_count != count) {
10720                 error(
10721 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10722                         bytenr, len, root_id, objectid, offset, count, found_count);
10723                 return REFERENCER_MISSING;
10724         }
10725         return 0;
10726 }
10727
10728 /*
10729  * Check if the referencer of a shared data backref exists
10730  */
10731 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10732                                      u64 parent, u64 bytenr)
10733 {
10734         struct extent_buffer *eb;
10735         struct btrfs_key key;
10736         struct btrfs_file_extent_item *fi;
10737         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10738         u32 nr;
10739         int found_parent = 0;
10740         int i;
10741
10742         eb = read_tree_block(fs_info, parent, nodesize, 0);
10743         if (!extent_buffer_uptodate(eb))
10744                 goto out;
10745
10746         nr = btrfs_header_nritems(eb);
10747         for (i = 0; i < nr; i++) {
10748                 btrfs_item_key_to_cpu(eb, &key, i);
10749                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10750                         continue;
10751
10752                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10753                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10754                         continue;
10755
10756                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10757                         found_parent = 1;
10758                         break;
10759                 }
10760         }
10761
10762 out:
10763         free_extent_buffer(eb);
10764         if (!found_parent) {
10765                 error("shared extent %llu referencer lost (parent: %llu)",
10766                         bytenr, parent);
10767                 return REFERENCER_MISSING;
10768         }
10769         return 0;
10770 }
10771
10772 /*
10773  * This function will check a given extent item, including its backref and
10774  * itself (like crossing stripe boundary and type)
10775  *
10776  * Since we don't use extent_record anymore, introduce new error bit
10777  */
10778 static int check_extent_item(struct btrfs_fs_info *fs_info,
10779                              struct extent_buffer *eb, int slot)
10780 {
10781         struct btrfs_extent_item *ei;
10782         struct btrfs_extent_inline_ref *iref;
10783         struct btrfs_extent_data_ref *dref;
10784         unsigned long end;
10785         unsigned long ptr;
10786         int type;
10787         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10788         u32 item_size = btrfs_item_size_nr(eb, slot);
10789         u64 flags;
10790         u64 offset;
10791         int metadata = 0;
10792         int level;
10793         struct btrfs_key key;
10794         int ret;
10795         int err = 0;
10796
10797         btrfs_item_key_to_cpu(eb, &key, slot);
10798         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10799                 bytes_used += key.offset;
10800         else
10801                 bytes_used += nodesize;
10802
10803         if (item_size < sizeof(*ei)) {
10804                 /*
10805                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10806                  * old thing when on disk format is still un-determined.
10807                  * No need to care about it anymore
10808                  */
10809                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10810                 return -ENOTTY;
10811         }
10812
10813         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10814         flags = btrfs_extent_flags(eb, ei);
10815
10816         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10817                 metadata = 1;
10818         if (metadata && check_crossing_stripes(global_info, key.objectid,
10819                                                eb->len)) {
10820                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10821                       key.objectid, key.objectid + nodesize);
10822                 err |= CROSSING_STRIPE_BOUNDARY;
10823         }
10824
10825         ptr = (unsigned long)(ei + 1);
10826
10827         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10828                 /* Old EXTENT_ITEM metadata */
10829                 struct btrfs_tree_block_info *info;
10830
10831                 info = (struct btrfs_tree_block_info *)ptr;
10832                 level = btrfs_tree_block_level(eb, info);
10833                 ptr += sizeof(struct btrfs_tree_block_info);
10834         } else {
10835                 /* New METADATA_ITEM */
10836                 level = key.offset;
10837         }
10838         end = (unsigned long)ei + item_size;
10839
10840 next:
10841         /* Reached extent item end normally */
10842         if (ptr == end)
10843                 goto out;
10844
10845         /* Beyond extent item end, wrong item size */
10846         if (ptr > end) {
10847                 err |= ITEM_SIZE_MISMATCH;
10848                 error("extent item at bytenr %llu slot %d has wrong size",
10849                         eb->start, slot);
10850                 goto out;
10851         }
10852
10853         /* Now check every backref in this extent item */
10854         iref = (struct btrfs_extent_inline_ref *)ptr;
10855         type = btrfs_extent_inline_ref_type(eb, iref);
10856         offset = btrfs_extent_inline_ref_offset(eb, iref);
10857         switch (type) {
10858         case BTRFS_TREE_BLOCK_REF_KEY:
10859                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10860                                                level);
10861                 err |= ret;
10862                 break;
10863         case BTRFS_SHARED_BLOCK_REF_KEY:
10864                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10865                                                  level);
10866                 err |= ret;
10867                 break;
10868         case BTRFS_EXTENT_DATA_REF_KEY:
10869                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10870                 ret = check_extent_data_backref(fs_info,
10871                                 btrfs_extent_data_ref_root(eb, dref),
10872                                 btrfs_extent_data_ref_objectid(eb, dref),
10873                                 btrfs_extent_data_ref_offset(eb, dref),
10874                                 key.objectid, key.offset,
10875                                 btrfs_extent_data_ref_count(eb, dref));
10876                 err |= ret;
10877                 break;
10878         case BTRFS_SHARED_DATA_REF_KEY:
10879                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10880                 err |= ret;
10881                 break;
10882         default:
10883                 error("extent[%llu %d %llu] has unknown ref type: %d",
10884                         key.objectid, key.type, key.offset, type);
10885                 err |= UNKNOWN_TYPE;
10886                 goto out;
10887         }
10888
10889         ptr += btrfs_extent_inline_ref_size(type);
10890         goto next;
10891
10892 out:
10893         return err;
10894 }
10895
10896 /*
10897  * Check if a dev extent item is referred correctly by its chunk
10898  */
10899 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10900                                  struct extent_buffer *eb, int slot)
10901 {
10902         struct btrfs_root *chunk_root = fs_info->chunk_root;
10903         struct btrfs_dev_extent *ptr;
10904         struct btrfs_path path;
10905         struct btrfs_key chunk_key;
10906         struct btrfs_key devext_key;
10907         struct btrfs_chunk *chunk;
10908         struct extent_buffer *l;
10909         int num_stripes;
10910         u64 length;
10911         int i;
10912         int found_chunk = 0;
10913         int ret;
10914
10915         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10916         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10917         length = btrfs_dev_extent_length(eb, ptr);
10918
10919         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10920         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10921         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10922
10923         btrfs_init_path(&path);
10924         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10925         if (ret)
10926                 goto out;
10927
10928         l = path.nodes[0];
10929         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10930         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10931                                       chunk_key.offset);
10932         if (ret < 0)
10933                 goto out;
10934
10935         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10936                 goto out;
10937
10938         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10939         for (i = 0; i < num_stripes; i++) {
10940                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10941                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10942
10943                 if (devid == devext_key.objectid &&
10944                     offset == devext_key.offset) {
10945                         found_chunk = 1;
10946                         break;
10947                 }
10948         }
10949 out:
10950         btrfs_release_path(&path);
10951         if (!found_chunk) {
10952                 error(
10953                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10954                         devext_key.objectid, devext_key.offset, length);
10955                 return REFERENCER_MISSING;
10956         }
10957         return 0;
10958 }
10959
10960 /*
10961  * Check if the used space is correct with the dev item
10962  */
10963 static int check_dev_item(struct btrfs_fs_info *fs_info,
10964                           struct extent_buffer *eb, int slot)
10965 {
10966         struct btrfs_root *dev_root = fs_info->dev_root;
10967         struct btrfs_dev_item *dev_item;
10968         struct btrfs_path path;
10969         struct btrfs_key key;
10970         struct btrfs_dev_extent *ptr;
10971         u64 dev_id;
10972         u64 used;
10973         u64 total = 0;
10974         int ret;
10975
10976         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10977         dev_id = btrfs_device_id(eb, dev_item);
10978         used = btrfs_device_bytes_used(eb, dev_item);
10979
10980         key.objectid = dev_id;
10981         key.type = BTRFS_DEV_EXTENT_KEY;
10982         key.offset = 0;
10983
10984         btrfs_init_path(&path);
10985         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10986         if (ret < 0) {
10987                 btrfs_item_key_to_cpu(eb, &key, slot);
10988                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10989                         key.objectid, key.type, key.offset);
10990                 btrfs_release_path(&path);
10991                 return REFERENCER_MISSING;
10992         }
10993
10994         /* Iterate dev_extents to calculate the used space of a device */
10995         while (1) {
10996                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10997                         goto next;
10998
10999                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11000                 if (key.objectid > dev_id)
11001                         break;
11002                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11003                         goto next;
11004
11005                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11006                                      struct btrfs_dev_extent);
11007                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11008 next:
11009                 ret = btrfs_next_item(dev_root, &path);
11010                 if (ret)
11011                         break;
11012         }
11013         btrfs_release_path(&path);
11014
11015         if (used != total) {
11016                 btrfs_item_key_to_cpu(eb, &key, slot);
11017                 error(
11018 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11019                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11020                         BTRFS_DEV_EXTENT_KEY, dev_id);
11021                 return ACCOUNTING_MISMATCH;
11022         }
11023         return 0;
11024 }
11025
11026 /*
11027  * Check a block group item with its referener (chunk) and its used space
11028  * with extent/metadata item
11029  */
11030 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11031                                   struct extent_buffer *eb, int slot)
11032 {
11033         struct btrfs_root *extent_root = fs_info->extent_root;
11034         struct btrfs_root *chunk_root = fs_info->chunk_root;
11035         struct btrfs_block_group_item *bi;
11036         struct btrfs_block_group_item bg_item;
11037         struct btrfs_path path;
11038         struct btrfs_key bg_key;
11039         struct btrfs_key chunk_key;
11040         struct btrfs_key extent_key;
11041         struct btrfs_chunk *chunk;
11042         struct extent_buffer *leaf;
11043         struct btrfs_extent_item *ei;
11044         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11045         u64 flags;
11046         u64 bg_flags;
11047         u64 used;
11048         u64 total = 0;
11049         int ret;
11050         int err = 0;
11051
11052         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11053         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11054         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11055         used = btrfs_block_group_used(&bg_item);
11056         bg_flags = btrfs_block_group_flags(&bg_item);
11057
11058         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11059         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11060         chunk_key.offset = bg_key.objectid;
11061
11062         btrfs_init_path(&path);
11063         /* Search for the referencer chunk */
11064         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11065         if (ret) {
11066                 error(
11067                 "block group[%llu %llu] did not find the related chunk item",
11068                         bg_key.objectid, bg_key.offset);
11069                 err |= REFERENCER_MISSING;
11070         } else {
11071                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11072                                         struct btrfs_chunk);
11073                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11074                                                 bg_key.offset) {
11075                         error(
11076         "block group[%llu %llu] related chunk item length does not match",
11077                                 bg_key.objectid, bg_key.offset);
11078                         err |= REFERENCER_MISMATCH;
11079                 }
11080         }
11081         btrfs_release_path(&path);
11082
11083         /* Search from the block group bytenr */
11084         extent_key.objectid = bg_key.objectid;
11085         extent_key.type = 0;
11086         extent_key.offset = 0;
11087
11088         btrfs_init_path(&path);
11089         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11090         if (ret < 0)
11091                 goto out;
11092
11093         /* Iterate extent tree to account used space */
11094         while (1) {
11095                 leaf = path.nodes[0];
11096
11097                 /* Search slot can point to the last item beyond leaf nritems */
11098                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11099                         goto next;
11100
11101                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11102                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11103                         break;
11104
11105                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11106                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11107                         goto next;
11108                 if (extent_key.objectid < bg_key.objectid)
11109                         goto next;
11110
11111                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11112                         total += nodesize;
11113                 else
11114                         total += extent_key.offset;
11115
11116                 ei = btrfs_item_ptr(leaf, path.slots[0],
11117                                     struct btrfs_extent_item);
11118                 flags = btrfs_extent_flags(leaf, ei);
11119                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11120                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11121                                 error(
11122                         "bad extent[%llu, %llu) type mismatch with chunk",
11123                                         extent_key.objectid,
11124                                         extent_key.objectid + extent_key.offset);
11125                                 err |= CHUNK_TYPE_MISMATCH;
11126                         }
11127                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11128                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11129                                     BTRFS_BLOCK_GROUP_METADATA))) {
11130                                 error(
11131                         "bad extent[%llu, %llu) type mismatch with chunk",
11132                                         extent_key.objectid,
11133                                         extent_key.objectid + nodesize);
11134                                 err |= CHUNK_TYPE_MISMATCH;
11135                         }
11136                 }
11137 next:
11138                 ret = btrfs_next_item(extent_root, &path);
11139                 if (ret)
11140                         break;
11141         }
11142
11143 out:
11144         btrfs_release_path(&path);
11145
11146         if (total != used) {
11147                 error(
11148                 "block group[%llu %llu] used %llu but extent items used %llu",
11149                         bg_key.objectid, bg_key.offset, used, total);
11150                 err |= ACCOUNTING_MISMATCH;
11151         }
11152         return err;
11153 }
11154
11155 /*
11156  * Check a chunk item.
11157  * Including checking all referred dev_extents and block group
11158  */
11159 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11160                             struct extent_buffer *eb, int slot)
11161 {
11162         struct btrfs_root *extent_root = fs_info->extent_root;
11163         struct btrfs_root *dev_root = fs_info->dev_root;
11164         struct btrfs_path path;
11165         struct btrfs_key chunk_key;
11166         struct btrfs_key bg_key;
11167         struct btrfs_key devext_key;
11168         struct btrfs_chunk *chunk;
11169         struct extent_buffer *leaf;
11170         struct btrfs_block_group_item *bi;
11171         struct btrfs_block_group_item bg_item;
11172         struct btrfs_dev_extent *ptr;
11173         u64 length;
11174         u64 chunk_end;
11175         u64 stripe_len;
11176         u64 type;
11177         int num_stripes;
11178         u64 offset;
11179         u64 objectid;
11180         int i;
11181         int ret;
11182         int err = 0;
11183
11184         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11185         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11186         length = btrfs_chunk_length(eb, chunk);
11187         chunk_end = chunk_key.offset + length;
11188         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11189                                       chunk_key.offset);
11190         if (ret < 0) {
11191                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11192                         chunk_end);
11193                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11194                 goto out;
11195         }
11196         type = btrfs_chunk_type(eb, chunk);
11197
11198         bg_key.objectid = chunk_key.offset;
11199         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11200         bg_key.offset = length;
11201
11202         btrfs_init_path(&path);
11203         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11204         if (ret) {
11205                 error(
11206                 "chunk[%llu %llu) did not find the related block group item",
11207                         chunk_key.offset, chunk_end);
11208                 err |= REFERENCER_MISSING;
11209         } else{
11210                 leaf = path.nodes[0];
11211                 bi = btrfs_item_ptr(leaf, path.slots[0],
11212                                     struct btrfs_block_group_item);
11213                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11214                                    sizeof(bg_item));
11215                 if (btrfs_block_group_flags(&bg_item) != type) {
11216                         error(
11217 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11218                                 chunk_key.offset, chunk_end, type,
11219                                 btrfs_block_group_flags(&bg_item));
11220                         err |= REFERENCER_MISSING;
11221                 }
11222         }
11223
11224         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11225         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11226         for (i = 0; i < num_stripes; i++) {
11227                 btrfs_release_path(&path);
11228                 btrfs_init_path(&path);
11229                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11230                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11231                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11232
11233                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11234                                         0, 0);
11235                 if (ret)
11236                         goto not_match_dev;
11237
11238                 leaf = path.nodes[0];
11239                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11240                                      struct btrfs_dev_extent);
11241                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11242                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11243                 if (objectid != chunk_key.objectid ||
11244                     offset != chunk_key.offset ||
11245                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11246                         goto not_match_dev;
11247                 continue;
11248 not_match_dev:
11249                 err |= BACKREF_MISSING;
11250                 error(
11251                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11252                         chunk_key.objectid, chunk_end, i);
11253                 continue;
11254         }
11255         btrfs_release_path(&path);
11256 out:
11257         return err;
11258 }
11259
11260 /*
11261  * Main entry function to check known items and update related accounting info
11262  */
11263 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11264 {
11265         struct btrfs_fs_info *fs_info = root->fs_info;
11266         struct btrfs_key key;
11267         int slot = 0;
11268         int type;
11269         struct btrfs_extent_data_ref *dref;
11270         int ret;
11271         int err = 0;
11272
11273 next:
11274         btrfs_item_key_to_cpu(eb, &key, slot);
11275         type = key.type;
11276
11277         switch (type) {
11278         case BTRFS_EXTENT_DATA_KEY:
11279                 ret = check_extent_data_item(root, eb, slot);
11280                 err |= ret;
11281                 break;
11282         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11283                 ret = check_block_group_item(fs_info, eb, slot);
11284                 err |= ret;
11285                 break;
11286         case BTRFS_DEV_ITEM_KEY:
11287                 ret = check_dev_item(fs_info, eb, slot);
11288                 err |= ret;
11289                 break;
11290         case BTRFS_CHUNK_ITEM_KEY:
11291                 ret = check_chunk_item(fs_info, eb, slot);
11292                 err |= ret;
11293                 break;
11294         case BTRFS_DEV_EXTENT_KEY:
11295                 ret = check_dev_extent_item(fs_info, eb, slot);
11296                 err |= ret;
11297                 break;
11298         case BTRFS_EXTENT_ITEM_KEY:
11299         case BTRFS_METADATA_ITEM_KEY:
11300                 ret = check_extent_item(fs_info, eb, slot);
11301                 err |= ret;
11302                 break;
11303         case BTRFS_EXTENT_CSUM_KEY:
11304                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11305                 break;
11306         case BTRFS_TREE_BLOCK_REF_KEY:
11307                 ret = check_tree_block_backref(fs_info, key.offset,
11308                                                key.objectid, -1);
11309                 err |= ret;
11310                 break;
11311         case BTRFS_EXTENT_DATA_REF_KEY:
11312                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11313                 ret = check_extent_data_backref(fs_info,
11314                                 btrfs_extent_data_ref_root(eb, dref),
11315                                 btrfs_extent_data_ref_objectid(eb, dref),
11316                                 btrfs_extent_data_ref_offset(eb, dref),
11317                                 key.objectid, 0,
11318                                 btrfs_extent_data_ref_count(eb, dref));
11319                 err |= ret;
11320                 break;
11321         case BTRFS_SHARED_BLOCK_REF_KEY:
11322                 ret = check_shared_block_backref(fs_info, key.offset,
11323                                                  key.objectid, -1);
11324                 err |= ret;
11325                 break;
11326         case BTRFS_SHARED_DATA_REF_KEY:
11327                 ret = check_shared_data_backref(fs_info, key.offset,
11328                                                 key.objectid);
11329                 err |= ret;
11330                 break;
11331         default:
11332                 break;
11333         }
11334
11335         if (++slot < btrfs_header_nritems(eb))
11336                 goto next;
11337
11338         return err;
11339 }
11340
11341 /*
11342  * Helper function for later fs/subvol tree check.  To determine if a tree
11343  * block should be checked.
11344  * This function will ensure only the direct referencer with lowest rootid to
11345  * check a fs/subvolume tree block.
11346  *
11347  * Backref check at extent tree would detect errors like missing subvolume
11348  * tree, so we can do aggressive check to reduce duplicated checks.
11349  */
11350 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11351 {
11352         struct btrfs_root *extent_root = root->fs_info->extent_root;
11353         struct btrfs_key key;
11354         struct btrfs_path path;
11355         struct extent_buffer *leaf;
11356         int slot;
11357         struct btrfs_extent_item *ei;
11358         unsigned long ptr;
11359         unsigned long end;
11360         int type;
11361         u32 item_size;
11362         u64 offset;
11363         struct btrfs_extent_inline_ref *iref;
11364         int ret;
11365
11366         btrfs_init_path(&path);
11367         key.objectid = btrfs_header_bytenr(eb);
11368         key.type = BTRFS_METADATA_ITEM_KEY;
11369         key.offset = (u64)-1;
11370
11371         /*
11372          * Any failure in backref resolving means we can't determine
11373          * whom the tree block belongs to.
11374          * So in that case, we need to check that tree block
11375          */
11376         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11377         if (ret < 0)
11378                 goto need_check;
11379
11380         ret = btrfs_previous_extent_item(extent_root, &path,
11381                                          btrfs_header_bytenr(eb));
11382         if (ret)
11383                 goto need_check;
11384
11385         leaf = path.nodes[0];
11386         slot = path.slots[0];
11387         btrfs_item_key_to_cpu(leaf, &key, slot);
11388         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11389
11390         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11391                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11392         } else {
11393                 struct btrfs_tree_block_info *info;
11394
11395                 info = (struct btrfs_tree_block_info *)(ei + 1);
11396                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11397         }
11398
11399         item_size = btrfs_item_size_nr(leaf, slot);
11400         ptr = (unsigned long)iref;
11401         end = (unsigned long)ei + item_size;
11402         while (ptr < end) {
11403                 iref = (struct btrfs_extent_inline_ref *)ptr;
11404                 type = btrfs_extent_inline_ref_type(leaf, iref);
11405                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11406
11407                 /*
11408                  * We only check the tree block if current root is
11409                  * the lowest referencer of it.
11410                  */
11411                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11412                     offset < root->objectid) {
11413                         btrfs_release_path(&path);
11414                         return 0;
11415                 }
11416
11417                 ptr += btrfs_extent_inline_ref_size(type);
11418         }
11419         /*
11420          * Normally we should also check keyed tree block ref, but that may be
11421          * very time consuming.  Inlined ref should already make us skip a lot
11422          * of refs now.  So skip search keyed tree block ref.
11423          */
11424
11425 need_check:
11426         btrfs_release_path(&path);
11427         return 1;
11428 }
11429
11430 /*
11431  * Traversal function for tree block. We will do:
11432  * 1) Skip shared fs/subvolume tree blocks
11433  * 2) Update related bytes accounting
11434  * 3) Pre-order traversal
11435  */
11436 static int traverse_tree_block(struct btrfs_root *root,
11437                                 struct extent_buffer *node)
11438 {
11439         struct extent_buffer *eb;
11440         struct btrfs_key key;
11441         struct btrfs_key drop_key;
11442         int level;
11443         u64 nr;
11444         int i;
11445         int err = 0;
11446         int ret;
11447
11448         /*
11449          * Skip shared fs/subvolume tree block, in that case they will
11450          * be checked by referencer with lowest rootid
11451          */
11452         if (is_fstree(root->objectid) && !should_check(root, node))
11453                 return 0;
11454
11455         /* Update bytes accounting */
11456         total_btree_bytes += node->len;
11457         if (fs_root_objectid(btrfs_header_owner(node)))
11458                 total_fs_tree_bytes += node->len;
11459         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11460                 total_extent_tree_bytes += node->len;
11461         if (!found_old_backref &&
11462             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11463             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11464             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11465                 found_old_backref = 1;
11466
11467         /* pre-order tranversal, check itself first */
11468         level = btrfs_header_level(node);
11469         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11470                                    btrfs_header_level(node),
11471                                    btrfs_header_owner(node));
11472         err |= ret;
11473         if (err)
11474                 error(
11475         "check %s failed root %llu bytenr %llu level %d, force continue check",
11476                         level ? "node":"leaf", root->objectid,
11477                         btrfs_header_bytenr(node), btrfs_header_level(node));
11478
11479         if (!level) {
11480                 btree_space_waste += btrfs_leaf_free_space(root, node);
11481                 ret = check_leaf_items(root, node);
11482                 err |= ret;
11483                 return err;
11484         }
11485
11486         nr = btrfs_header_nritems(node);
11487         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11488         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11489                 sizeof(struct btrfs_key_ptr);
11490
11491         /* Then check all its children */
11492         for (i = 0; i < nr; i++) {
11493                 u64 blocknr = btrfs_node_blockptr(node, i);
11494
11495                 btrfs_node_key_to_cpu(node, &key, i);
11496                 if (level == root->root_item.drop_level &&
11497                     is_dropped_key(&key, &drop_key))
11498                         continue;
11499
11500                 /*
11501                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11502                  * to call the function itself.
11503                  */
11504                 eb = read_tree_block(root->fs_info, blocknr,
11505                                 root->fs_info->nodesize, 0);
11506                 if (extent_buffer_uptodate(eb)) {
11507                         ret = traverse_tree_block(root, eb);
11508                         err |= ret;
11509                 }
11510                 free_extent_buffer(eb);
11511         }
11512
11513         return err;
11514 }
11515
11516 /*
11517  * Low memory usage version check_chunks_and_extents.
11518  */
11519 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11520 {
11521         struct btrfs_path path;
11522         struct btrfs_key key;
11523         struct btrfs_root *root1;
11524         struct btrfs_root *cur_root;
11525         int err = 0;
11526         int ret;
11527
11528         root1 = root->fs_info->chunk_root;
11529         ret = traverse_tree_block(root1, root1->node);
11530         err |= ret;
11531
11532         root1 = root->fs_info->tree_root;
11533         ret = traverse_tree_block(root1, root1->node);
11534         err |= ret;
11535
11536         btrfs_init_path(&path);
11537         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11538         key.offset = 0;
11539         key.type = BTRFS_ROOT_ITEM_KEY;
11540
11541         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11542         if (ret) {
11543                 error("cannot find extent treet in tree_root");
11544                 goto out;
11545         }
11546
11547         while (1) {
11548                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11549                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11550                         goto next;
11551                 key.offset = (u64)-1;
11552
11553                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11554                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11555                                         &key);
11556                 else
11557                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11558                 if (IS_ERR(cur_root) || !cur_root) {
11559                         error("failed to read tree: %lld", key.objectid);
11560                         goto next;
11561                 }
11562
11563                 ret = traverse_tree_block(cur_root, cur_root->node);
11564                 err |= ret;
11565
11566                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11567                         btrfs_free_fs_root(cur_root);
11568 next:
11569                 ret = btrfs_next_item(root1, &path);
11570                 if (ret)
11571                         goto out;
11572         }
11573
11574 out:
11575         btrfs_release_path(&path);
11576         return err;
11577 }
11578
11579 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11580                            struct btrfs_root *root, int overwrite)
11581 {
11582         struct extent_buffer *c;
11583         struct extent_buffer *old = root->node;
11584         int level;
11585         int ret;
11586         struct btrfs_disk_key disk_key = {0,0,0};
11587
11588         level = 0;
11589
11590         if (overwrite) {
11591                 c = old;
11592                 extent_buffer_get(c);
11593                 goto init;
11594         }
11595         c = btrfs_alloc_free_block(trans, root,
11596                                    root->fs_info->nodesize,
11597                                    root->root_key.objectid,
11598                                    &disk_key, level, 0, 0);
11599         if (IS_ERR(c)) {
11600                 c = old;
11601                 extent_buffer_get(c);
11602                 overwrite = 1;
11603         }
11604 init:
11605         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11606         btrfs_set_header_level(c, level);
11607         btrfs_set_header_bytenr(c, c->start);
11608         btrfs_set_header_generation(c, trans->transid);
11609         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11610         btrfs_set_header_owner(c, root->root_key.objectid);
11611
11612         write_extent_buffer(c, root->fs_info->fsid,
11613                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11614
11615         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11616                             btrfs_header_chunk_tree_uuid(c),
11617                             BTRFS_UUID_SIZE);
11618
11619         btrfs_mark_buffer_dirty(c);
11620         /*
11621          * this case can happen in the following case:
11622          *
11623          * 1.overwrite previous root.
11624          *
11625          * 2.reinit reloc data root, this is because we skip pin
11626          * down reloc data tree before which means we can allocate
11627          * same block bytenr here.
11628          */
11629         if (old->start == c->start) {
11630                 btrfs_set_root_generation(&root->root_item,
11631                                           trans->transid);
11632                 root->root_item.level = btrfs_header_level(root->node);
11633                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11634                                         &root->root_key, &root->root_item);
11635                 if (ret) {
11636                         free_extent_buffer(c);
11637                         return ret;
11638                 }
11639         }
11640         free_extent_buffer(old);
11641         root->node = c;
11642         add_root_to_dirty_list(root);
11643         return 0;
11644 }
11645
11646 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11647                                 struct extent_buffer *eb, int tree_root)
11648 {
11649         struct extent_buffer *tmp;
11650         struct btrfs_root_item *ri;
11651         struct btrfs_key key;
11652         u64 bytenr;
11653         u32 nodesize;
11654         int level = btrfs_header_level(eb);
11655         int nritems;
11656         int ret;
11657         int i;
11658
11659         /*
11660          * If we have pinned this block before, don't pin it again.
11661          * This can not only avoid forever loop with broken filesystem
11662          * but also give us some speedups.
11663          */
11664         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11665                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11666                 return 0;
11667
11668         btrfs_pin_extent(fs_info, eb->start, eb->len);
11669
11670         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11671         nritems = btrfs_header_nritems(eb);
11672         for (i = 0; i < nritems; i++) {
11673                 if (level == 0) {
11674                         btrfs_item_key_to_cpu(eb, &key, i);
11675                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11676                                 continue;
11677                         /* Skip the extent root and reloc roots */
11678                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11679                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11680                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11681                                 continue;
11682                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11683                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11684
11685                         /*
11686                          * If at any point we start needing the real root we
11687                          * will have to build a stump root for the root we are
11688                          * in, but for now this doesn't actually use the root so
11689                          * just pass in extent_root.
11690                          */
11691                         tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11692                         if (!extent_buffer_uptodate(tmp)) {
11693                                 fprintf(stderr, "Error reading root block\n");
11694                                 return -EIO;
11695                         }
11696                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11697                         free_extent_buffer(tmp);
11698                         if (ret)
11699                                 return ret;
11700                 } else {
11701                         bytenr = btrfs_node_blockptr(eb, i);
11702
11703                         /* If we aren't the tree root don't read the block */
11704                         if (level == 1 && !tree_root) {
11705                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11706                                 continue;
11707                         }
11708
11709                         tmp = read_tree_block(fs_info, bytenr,
11710                                               nodesize, 0);
11711                         if (!extent_buffer_uptodate(tmp)) {
11712                                 fprintf(stderr, "Error reading tree block\n");
11713                                 return -EIO;
11714                         }
11715                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11716                         free_extent_buffer(tmp);
11717                         if (ret)
11718                                 return ret;
11719                 }
11720         }
11721
11722         return 0;
11723 }
11724
11725 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11726 {
11727         int ret;
11728
11729         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11730         if (ret)
11731                 return ret;
11732
11733         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11734 }
11735
11736 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11737 {
11738         struct btrfs_block_group_cache *cache;
11739         struct btrfs_path path;
11740         struct extent_buffer *leaf;
11741         struct btrfs_chunk *chunk;
11742         struct btrfs_key key;
11743         int ret;
11744         u64 start;
11745
11746         btrfs_init_path(&path);
11747         key.objectid = 0;
11748         key.type = BTRFS_CHUNK_ITEM_KEY;
11749         key.offset = 0;
11750         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11751         if (ret < 0) {
11752                 btrfs_release_path(&path);
11753                 return ret;
11754         }
11755
11756         /*
11757          * We do this in case the block groups were screwed up and had alloc
11758          * bits that aren't actually set on the chunks.  This happens with
11759          * restored images every time and could happen in real life I guess.
11760          */
11761         fs_info->avail_data_alloc_bits = 0;
11762         fs_info->avail_metadata_alloc_bits = 0;
11763         fs_info->avail_system_alloc_bits = 0;
11764
11765         /* First we need to create the in-memory block groups */
11766         while (1) {
11767                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11768                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11769                         if (ret < 0) {
11770                                 btrfs_release_path(&path);
11771                                 return ret;
11772                         }
11773                         if (ret) {
11774                                 ret = 0;
11775                                 break;
11776                         }
11777                 }
11778                 leaf = path.nodes[0];
11779                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11780                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11781                         path.slots[0]++;
11782                         continue;
11783                 }
11784
11785                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11786                 btrfs_add_block_group(fs_info, 0,
11787                                       btrfs_chunk_type(leaf, chunk),
11788                                       key.objectid, key.offset,
11789                                       btrfs_chunk_length(leaf, chunk));
11790                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11791                                  key.offset + btrfs_chunk_length(leaf, chunk));
11792                 path.slots[0]++;
11793         }
11794         start = 0;
11795         while (1) {
11796                 cache = btrfs_lookup_first_block_group(fs_info, start);
11797                 if (!cache)
11798                         break;
11799                 cache->cached = 1;
11800                 start = cache->key.objectid + cache->key.offset;
11801         }
11802
11803         btrfs_release_path(&path);
11804         return 0;
11805 }
11806
11807 static int reset_balance(struct btrfs_trans_handle *trans,
11808                          struct btrfs_fs_info *fs_info)
11809 {
11810         struct btrfs_root *root = fs_info->tree_root;
11811         struct btrfs_path path;
11812         struct extent_buffer *leaf;
11813         struct btrfs_key key;
11814         int del_slot, del_nr = 0;
11815         int ret;
11816         int found = 0;
11817
11818         btrfs_init_path(&path);
11819         key.objectid = BTRFS_BALANCE_OBJECTID;
11820         key.type = BTRFS_BALANCE_ITEM_KEY;
11821         key.offset = 0;
11822         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11823         if (ret) {
11824                 if (ret > 0)
11825                         ret = 0;
11826                 if (!ret)
11827                         goto reinit_data_reloc;
11828                 else
11829                         goto out;
11830         }
11831
11832         ret = btrfs_del_item(trans, root, &path);
11833         if (ret)
11834                 goto out;
11835         btrfs_release_path(&path);
11836
11837         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11838         key.type = BTRFS_ROOT_ITEM_KEY;
11839         key.offset = 0;
11840         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11841         if (ret < 0)
11842                 goto out;
11843         while (1) {
11844                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11845                         if (!found)
11846                                 break;
11847
11848                         if (del_nr) {
11849                                 ret = btrfs_del_items(trans, root, &path,
11850                                                       del_slot, del_nr);
11851                                 del_nr = 0;
11852                                 if (ret)
11853                                         goto out;
11854                         }
11855                         key.offset++;
11856                         btrfs_release_path(&path);
11857
11858                         found = 0;
11859                         ret = btrfs_search_slot(trans, root, &key, &path,
11860                                                 -1, 1);
11861                         if (ret < 0)
11862                                 goto out;
11863                         continue;
11864                 }
11865                 found = 1;
11866                 leaf = path.nodes[0];
11867                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11868                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11869                         break;
11870                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11871                         path.slots[0]++;
11872                         continue;
11873                 }
11874                 if (!del_nr) {
11875                         del_slot = path.slots[0];
11876                         del_nr = 1;
11877                 } else {
11878                         del_nr++;
11879                 }
11880                 path.slots[0]++;
11881         }
11882
11883         if (del_nr) {
11884                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11885                 if (ret)
11886                         goto out;
11887         }
11888         btrfs_release_path(&path);
11889
11890 reinit_data_reloc:
11891         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11892         key.type = BTRFS_ROOT_ITEM_KEY;
11893         key.offset = (u64)-1;
11894         root = btrfs_read_fs_root(fs_info, &key);
11895         if (IS_ERR(root)) {
11896                 fprintf(stderr, "Error reading data reloc tree\n");
11897                 ret = PTR_ERR(root);
11898                 goto out;
11899         }
11900         record_root_in_trans(trans, root);
11901         ret = btrfs_fsck_reinit_root(trans, root, 0);
11902         if (ret)
11903                 goto out;
11904         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11905 out:
11906         btrfs_release_path(&path);
11907         return ret;
11908 }
11909
11910 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11911                               struct btrfs_fs_info *fs_info)
11912 {
11913         u64 start = 0;
11914         int ret;
11915
11916         /*
11917          * The only reason we don't do this is because right now we're just
11918          * walking the trees we find and pinning down their bytes, we don't look
11919          * at any of the leaves.  In order to do mixed groups we'd have to check
11920          * the leaves of any fs roots and pin down the bytes for any file
11921          * extents we find.  Not hard but why do it if we don't have to?
11922          */
11923         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11924                 fprintf(stderr, "We don't support re-initing the extent tree "
11925                         "for mixed block groups yet, please notify a btrfs "
11926                         "developer you want to do this so they can add this "
11927                         "functionality.\n");
11928                 return -EINVAL;
11929         }
11930
11931         /*
11932          * first we need to walk all of the trees except the extent tree and pin
11933          * down the bytes that are in use so we don't overwrite any existing
11934          * metadata.
11935          */
11936         ret = pin_metadata_blocks(fs_info);
11937         if (ret) {
11938                 fprintf(stderr, "error pinning down used bytes\n");
11939                 return ret;
11940         }
11941
11942         /*
11943          * Need to drop all the block groups since we're going to recreate all
11944          * of them again.
11945          */
11946         btrfs_free_block_groups(fs_info);
11947         ret = reset_block_groups(fs_info);
11948         if (ret) {
11949                 fprintf(stderr, "error resetting the block groups\n");
11950                 return ret;
11951         }
11952
11953         /* Ok we can allocate now, reinit the extent root */
11954         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11955         if (ret) {
11956                 fprintf(stderr, "extent root initialization failed\n");
11957                 /*
11958                  * When the transaction code is updated we should end the
11959                  * transaction, but for now progs only knows about commit so
11960                  * just return an error.
11961                  */
11962                 return ret;
11963         }
11964
11965         /*
11966          * Now we have all the in-memory block groups setup so we can make
11967          * allocations properly, and the metadata we care about is safe since we
11968          * pinned all of it above.
11969          */
11970         while (1) {
11971                 struct btrfs_block_group_cache *cache;
11972
11973                 cache = btrfs_lookup_first_block_group(fs_info, start);
11974                 if (!cache)
11975                         break;
11976                 start = cache->key.objectid + cache->key.offset;
11977                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11978                                         &cache->key, &cache->item,
11979                                         sizeof(cache->item));
11980                 if (ret) {
11981                         fprintf(stderr, "Error adding block group\n");
11982                         return ret;
11983                 }
11984                 btrfs_extent_post_op(trans, fs_info->extent_root);
11985         }
11986
11987         ret = reset_balance(trans, fs_info);
11988         if (ret)
11989                 fprintf(stderr, "error resetting the pending balance\n");
11990
11991         return ret;
11992 }
11993
11994 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11995 {
11996         struct btrfs_path path;
11997         struct btrfs_trans_handle *trans;
11998         struct btrfs_key key;
11999         int ret;
12000
12001         printf("Recowing metadata block %llu\n", eb->start);
12002         key.objectid = btrfs_header_owner(eb);
12003         key.type = BTRFS_ROOT_ITEM_KEY;
12004         key.offset = (u64)-1;
12005
12006         root = btrfs_read_fs_root(root->fs_info, &key);
12007         if (IS_ERR(root)) {
12008                 fprintf(stderr, "Couldn't find owner root %llu\n",
12009                         key.objectid);
12010                 return PTR_ERR(root);
12011         }
12012
12013         trans = btrfs_start_transaction(root, 1);
12014         if (IS_ERR(trans))
12015                 return PTR_ERR(trans);
12016
12017         btrfs_init_path(&path);
12018         path.lowest_level = btrfs_header_level(eb);
12019         if (path.lowest_level)
12020                 btrfs_node_key_to_cpu(eb, &key, 0);
12021         else
12022                 btrfs_item_key_to_cpu(eb, &key, 0);
12023
12024         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12025         btrfs_commit_transaction(trans, root);
12026         btrfs_release_path(&path);
12027         return ret;
12028 }
12029
12030 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12031 {
12032         struct btrfs_path path;
12033         struct btrfs_trans_handle *trans;
12034         struct btrfs_key key;
12035         int ret;
12036
12037         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12038                bad->key.type, bad->key.offset);
12039         key.objectid = bad->root_id;
12040         key.type = BTRFS_ROOT_ITEM_KEY;
12041         key.offset = (u64)-1;
12042
12043         root = btrfs_read_fs_root(root->fs_info, &key);
12044         if (IS_ERR(root)) {
12045                 fprintf(stderr, "Couldn't find owner root %llu\n",
12046                         key.objectid);
12047                 return PTR_ERR(root);
12048         }
12049
12050         trans = btrfs_start_transaction(root, 1);
12051         if (IS_ERR(trans))
12052                 return PTR_ERR(trans);
12053
12054         btrfs_init_path(&path);
12055         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12056         if (ret) {
12057                 if (ret > 0)
12058                         ret = 0;
12059                 goto out;
12060         }
12061         ret = btrfs_del_item(trans, root, &path);
12062 out:
12063         btrfs_commit_transaction(trans, root);
12064         btrfs_release_path(&path);
12065         return ret;
12066 }
12067
12068 static int zero_log_tree(struct btrfs_root *root)
12069 {
12070         struct btrfs_trans_handle *trans;
12071         int ret;
12072
12073         trans = btrfs_start_transaction(root, 1);
12074         if (IS_ERR(trans)) {
12075                 ret = PTR_ERR(trans);
12076                 return ret;
12077         }
12078         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12079         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12080         ret = btrfs_commit_transaction(trans, root);
12081         return ret;
12082 }
12083
12084 static int populate_csum(struct btrfs_trans_handle *trans,
12085                          struct btrfs_root *csum_root, char *buf, u64 start,
12086                          u64 len)
12087 {
12088         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12089         u64 offset = 0;
12090         u64 sectorsize;
12091         int ret = 0;
12092
12093         while (offset < len) {
12094                 sectorsize = fs_info->sectorsize;
12095                 ret = read_extent_data(fs_info, buf, start + offset,
12096                                        &sectorsize, 0);
12097                 if (ret)
12098                         break;
12099                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12100                                             start + offset, buf, sectorsize);
12101                 if (ret)
12102                         break;
12103                 offset += sectorsize;
12104         }
12105         return ret;
12106 }
12107
12108 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12109                                       struct btrfs_root *csum_root,
12110                                       struct btrfs_root *cur_root)
12111 {
12112         struct btrfs_path path;
12113         struct btrfs_key key;
12114         struct extent_buffer *node;
12115         struct btrfs_file_extent_item *fi;
12116         char *buf = NULL;
12117         u64 start = 0;
12118         u64 len = 0;
12119         int slot = 0;
12120         int ret = 0;
12121
12122         buf = malloc(cur_root->fs_info->sectorsize);
12123         if (!buf)
12124                 return -ENOMEM;
12125
12126         btrfs_init_path(&path);
12127         key.objectid = 0;
12128         key.offset = 0;
12129         key.type = 0;
12130         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12131         if (ret < 0)
12132                 goto out;
12133         /* Iterate all regular file extents and fill its csum */
12134         while (1) {
12135                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12136
12137                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12138                         goto next;
12139                 node = path.nodes[0];
12140                 slot = path.slots[0];
12141                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12142                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12143                         goto next;
12144                 start = btrfs_file_extent_disk_bytenr(node, fi);
12145                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12146
12147                 ret = populate_csum(trans, csum_root, buf, start, len);
12148                 if (ret == -EEXIST)
12149                         ret = 0;
12150                 if (ret < 0)
12151                         goto out;
12152 next:
12153                 /*
12154                  * TODO: if next leaf is corrupted, jump to nearest next valid
12155                  * leaf.
12156                  */
12157                 ret = btrfs_next_item(cur_root, &path);
12158                 if (ret < 0)
12159                         goto out;
12160                 if (ret > 0) {
12161                         ret = 0;
12162                         goto out;
12163                 }
12164         }
12165
12166 out:
12167         btrfs_release_path(&path);
12168         free(buf);
12169         return ret;
12170 }
12171
12172 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12173                                   struct btrfs_root *csum_root)
12174 {
12175         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12176         struct btrfs_path path;
12177         struct btrfs_root *tree_root = fs_info->tree_root;
12178         struct btrfs_root *cur_root;
12179         struct extent_buffer *node;
12180         struct btrfs_key key;
12181         int slot = 0;
12182         int ret = 0;
12183
12184         btrfs_init_path(&path);
12185         key.objectid = BTRFS_FS_TREE_OBJECTID;
12186         key.offset = 0;
12187         key.type = BTRFS_ROOT_ITEM_KEY;
12188         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12189         if (ret < 0)
12190                 goto out;
12191         if (ret > 0) {
12192                 ret = -ENOENT;
12193                 goto out;
12194         }
12195
12196         while (1) {
12197                 node = path.nodes[0];
12198                 slot = path.slots[0];
12199                 btrfs_item_key_to_cpu(node, &key, slot);
12200                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12201                         goto out;
12202                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12203                         goto next;
12204                 if (!is_fstree(key.objectid))
12205                         goto next;
12206                 key.offset = (u64)-1;
12207
12208                 cur_root = btrfs_read_fs_root(fs_info, &key);
12209                 if (IS_ERR(cur_root) || !cur_root) {
12210                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12211                                 key.objectid);
12212                         goto out;
12213                 }
12214                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12215                                 cur_root);
12216                 if (ret < 0)
12217                         goto out;
12218 next:
12219                 ret = btrfs_next_item(tree_root, &path);
12220                 if (ret > 0) {
12221                         ret = 0;
12222                         goto out;
12223                 }
12224                 if (ret < 0)
12225                         goto out;
12226         }
12227
12228 out:
12229         btrfs_release_path(&path);
12230         return ret;
12231 }
12232
12233 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12234                                       struct btrfs_root *csum_root)
12235 {
12236         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12237         struct btrfs_path path;
12238         struct btrfs_extent_item *ei;
12239         struct extent_buffer *leaf;
12240         char *buf;
12241         struct btrfs_key key;
12242         int ret;
12243
12244         btrfs_init_path(&path);
12245         key.objectid = 0;
12246         key.type = BTRFS_EXTENT_ITEM_KEY;
12247         key.offset = 0;
12248         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12249         if (ret < 0) {
12250                 btrfs_release_path(&path);
12251                 return ret;
12252         }
12253
12254         buf = malloc(csum_root->fs_info->sectorsize);
12255         if (!buf) {
12256                 btrfs_release_path(&path);
12257                 return -ENOMEM;
12258         }
12259
12260         while (1) {
12261                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12262                         ret = btrfs_next_leaf(extent_root, &path);
12263                         if (ret < 0)
12264                                 break;
12265                         if (ret) {
12266                                 ret = 0;
12267                                 break;
12268                         }
12269                 }
12270                 leaf = path.nodes[0];
12271
12272                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12273                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12274                         path.slots[0]++;
12275                         continue;
12276                 }
12277
12278                 ei = btrfs_item_ptr(leaf, path.slots[0],
12279                                     struct btrfs_extent_item);
12280                 if (!(btrfs_extent_flags(leaf, ei) &
12281                       BTRFS_EXTENT_FLAG_DATA)) {
12282                         path.slots[0]++;
12283                         continue;
12284                 }
12285
12286                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12287                                     key.offset);
12288                 if (ret)
12289                         break;
12290                 path.slots[0]++;
12291         }
12292
12293         btrfs_release_path(&path);
12294         free(buf);
12295         return ret;
12296 }
12297
12298 /*
12299  * Recalculate the csum and put it into the csum tree.
12300  *
12301  * Extent tree init will wipe out all the extent info, so in that case, we
12302  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12303  * will use fs/subvol trees to init the csum tree.
12304  */
12305 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12306                           struct btrfs_root *csum_root,
12307                           int search_fs_tree)
12308 {
12309         if (search_fs_tree)
12310                 return fill_csum_tree_from_fs(trans, csum_root);
12311         else
12312                 return fill_csum_tree_from_extent(trans, csum_root);
12313 }
12314
12315 static void free_roots_info_cache(void)
12316 {
12317         if (!roots_info_cache)
12318                 return;
12319
12320         while (!cache_tree_empty(roots_info_cache)) {
12321                 struct cache_extent *entry;
12322                 struct root_item_info *rii;
12323
12324                 entry = first_cache_extent(roots_info_cache);
12325                 if (!entry)
12326                         break;
12327                 remove_cache_extent(roots_info_cache, entry);
12328                 rii = container_of(entry, struct root_item_info, cache_extent);
12329                 free(rii);
12330         }
12331
12332         free(roots_info_cache);
12333         roots_info_cache = NULL;
12334 }
12335
12336 static int build_roots_info_cache(struct btrfs_fs_info *info)
12337 {
12338         int ret = 0;
12339         struct btrfs_key key;
12340         struct extent_buffer *leaf;
12341         struct btrfs_path path;
12342
12343         if (!roots_info_cache) {
12344                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12345                 if (!roots_info_cache)
12346                         return -ENOMEM;
12347                 cache_tree_init(roots_info_cache);
12348         }
12349
12350         btrfs_init_path(&path);
12351         key.objectid = 0;
12352         key.type = BTRFS_EXTENT_ITEM_KEY;
12353         key.offset = 0;
12354         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12355         if (ret < 0)
12356                 goto out;
12357         leaf = path.nodes[0];
12358
12359         while (1) {
12360                 struct btrfs_key found_key;
12361                 struct btrfs_extent_item *ei;
12362                 struct btrfs_extent_inline_ref *iref;
12363                 int slot = path.slots[0];
12364                 int type;
12365                 u64 flags;
12366                 u64 root_id;
12367                 u8 level;
12368                 struct cache_extent *entry;
12369                 struct root_item_info *rii;
12370
12371                 if (slot >= btrfs_header_nritems(leaf)) {
12372                         ret = btrfs_next_leaf(info->extent_root, &path);
12373                         if (ret < 0) {
12374                                 break;
12375                         } else if (ret) {
12376                                 ret = 0;
12377                                 break;
12378                         }
12379                         leaf = path.nodes[0];
12380                         slot = path.slots[0];
12381                 }
12382
12383                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12384
12385                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12386                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12387                         goto next;
12388
12389                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12390                 flags = btrfs_extent_flags(leaf, ei);
12391
12392                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12393                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12394                         goto next;
12395
12396                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12397                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12398                         level = found_key.offset;
12399                 } else {
12400                         struct btrfs_tree_block_info *binfo;
12401
12402                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12403                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12404                         level = btrfs_tree_block_level(leaf, binfo);
12405                 }
12406
12407                 /*
12408                  * For a root extent, it must be of the following type and the
12409                  * first (and only one) iref in the item.
12410                  */
12411                 type = btrfs_extent_inline_ref_type(leaf, iref);
12412                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12413                         goto next;
12414
12415                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12416                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12417                 if (!entry) {
12418                         rii = malloc(sizeof(struct root_item_info));
12419                         if (!rii) {
12420                                 ret = -ENOMEM;
12421                                 goto out;
12422                         }
12423                         rii->cache_extent.start = root_id;
12424                         rii->cache_extent.size = 1;
12425                         rii->level = (u8)-1;
12426                         entry = &rii->cache_extent;
12427                         ret = insert_cache_extent(roots_info_cache, entry);
12428                         ASSERT(ret == 0);
12429                 } else {
12430                         rii = container_of(entry, struct root_item_info,
12431                                            cache_extent);
12432                 }
12433
12434                 ASSERT(rii->cache_extent.start == root_id);
12435                 ASSERT(rii->cache_extent.size == 1);
12436
12437                 if (level > rii->level || rii->level == (u8)-1) {
12438                         rii->level = level;
12439                         rii->bytenr = found_key.objectid;
12440                         rii->gen = btrfs_extent_generation(leaf, ei);
12441                         rii->node_count = 1;
12442                 } else if (level == rii->level) {
12443                         rii->node_count++;
12444                 }
12445 next:
12446                 path.slots[0]++;
12447         }
12448
12449 out:
12450         btrfs_release_path(&path);
12451
12452         return ret;
12453 }
12454
12455 static int maybe_repair_root_item(struct btrfs_path *path,
12456                                   const struct btrfs_key *root_key,
12457                                   const int read_only_mode)
12458 {
12459         const u64 root_id = root_key->objectid;
12460         struct cache_extent *entry;
12461         struct root_item_info *rii;
12462         struct btrfs_root_item ri;
12463         unsigned long offset;
12464
12465         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12466         if (!entry) {
12467                 fprintf(stderr,
12468                         "Error: could not find extent items for root %llu\n",
12469                         root_key->objectid);
12470                 return -ENOENT;
12471         }
12472
12473         rii = container_of(entry, struct root_item_info, cache_extent);
12474         ASSERT(rii->cache_extent.start == root_id);
12475         ASSERT(rii->cache_extent.size == 1);
12476
12477         if (rii->node_count != 1) {
12478                 fprintf(stderr,
12479                         "Error: could not find btree root extent for root %llu\n",
12480                         root_id);
12481                 return -ENOENT;
12482         }
12483
12484         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12485         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12486
12487         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12488             btrfs_root_level(&ri) != rii->level ||
12489             btrfs_root_generation(&ri) != rii->gen) {
12490
12491                 /*
12492                  * If we're in repair mode but our caller told us to not update
12493                  * the root item, i.e. just check if it needs to be updated, don't
12494                  * print this message, since the caller will call us again shortly
12495                  * for the same root item without read only mode (the caller will
12496                  * open a transaction first).
12497                  */
12498                 if (!(read_only_mode && repair))
12499                         fprintf(stderr,
12500                                 "%sroot item for root %llu,"
12501                                 " current bytenr %llu, current gen %llu, current level %u,"
12502                                 " new bytenr %llu, new gen %llu, new level %u\n",
12503                                 (read_only_mode ? "" : "fixing "),
12504                                 root_id,
12505                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12506                                 btrfs_root_level(&ri),
12507                                 rii->bytenr, rii->gen, rii->level);
12508
12509                 if (btrfs_root_generation(&ri) > rii->gen) {
12510                         fprintf(stderr,
12511                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12512                                 root_id, btrfs_root_generation(&ri), rii->gen);
12513                         return -EINVAL;
12514                 }
12515
12516                 if (!read_only_mode) {
12517                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12518                         btrfs_set_root_level(&ri, rii->level);
12519                         btrfs_set_root_generation(&ri, rii->gen);
12520                         write_extent_buffer(path->nodes[0], &ri,
12521                                             offset, sizeof(ri));
12522                 }
12523
12524                 return 1;
12525         }
12526
12527         return 0;
12528 }
12529
12530 /*
12531  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12532  * caused read-only snapshots to be corrupted if they were created at a moment
12533  * when the source subvolume/snapshot had orphan items. The issue was that the
12534  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12535  * node instead of the post orphan cleanup root node.
12536  * So this function, and its callees, just detects and fixes those cases. Even
12537  * though the regression was for read-only snapshots, this function applies to
12538  * any snapshot/subvolume root.
12539  * This must be run before any other repair code - not doing it so, makes other
12540  * repair code delete or modify backrefs in the extent tree for example, which
12541  * will result in an inconsistent fs after repairing the root items.
12542  */
12543 static int repair_root_items(struct btrfs_fs_info *info)
12544 {
12545         struct btrfs_path path;
12546         struct btrfs_key key;
12547         struct extent_buffer *leaf;
12548         struct btrfs_trans_handle *trans = NULL;
12549         int ret = 0;
12550         int bad_roots = 0;
12551         int need_trans = 0;
12552
12553         btrfs_init_path(&path);
12554
12555         ret = build_roots_info_cache(info);
12556         if (ret)
12557                 goto out;
12558
12559         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12560         key.type = BTRFS_ROOT_ITEM_KEY;
12561         key.offset = 0;
12562
12563 again:
12564         /*
12565          * Avoid opening and committing transactions if a leaf doesn't have
12566          * any root items that need to be fixed, so that we avoid rotating
12567          * backup roots unnecessarily.
12568          */
12569         if (need_trans) {
12570                 trans = btrfs_start_transaction(info->tree_root, 1);
12571                 if (IS_ERR(trans)) {
12572                         ret = PTR_ERR(trans);
12573                         goto out;
12574                 }
12575         }
12576
12577         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12578                                 0, trans ? 1 : 0);
12579         if (ret < 0)
12580                 goto out;
12581         leaf = path.nodes[0];
12582
12583         while (1) {
12584                 struct btrfs_key found_key;
12585
12586                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12587                         int no_more_keys = find_next_key(&path, &key);
12588
12589                         btrfs_release_path(&path);
12590                         if (trans) {
12591                                 ret = btrfs_commit_transaction(trans,
12592                                                                info->tree_root);
12593                                 trans = NULL;
12594                                 if (ret < 0)
12595                                         goto out;
12596                         }
12597                         need_trans = 0;
12598                         if (no_more_keys)
12599                                 break;
12600                         goto again;
12601                 }
12602
12603                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12604
12605                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12606                         goto next;
12607                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12608                         goto next;
12609
12610                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12611                 if (ret < 0)
12612                         goto out;
12613                 if (ret) {
12614                         if (!trans && repair) {
12615                                 need_trans = 1;
12616                                 key = found_key;
12617                                 btrfs_release_path(&path);
12618                                 goto again;
12619                         }
12620                         bad_roots++;
12621                 }
12622 next:
12623                 path.slots[0]++;
12624         }
12625         ret = 0;
12626 out:
12627         free_roots_info_cache();
12628         btrfs_release_path(&path);
12629         if (trans)
12630                 btrfs_commit_transaction(trans, info->tree_root);
12631         if (ret < 0)
12632                 return ret;
12633
12634         return bad_roots;
12635 }
12636
12637 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12638 {
12639         struct btrfs_trans_handle *trans;
12640         struct btrfs_block_group_cache *bg_cache;
12641         u64 current = 0;
12642         int ret = 0;
12643
12644         /* Clear all free space cache inodes and its extent data */
12645         while (1) {
12646                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12647                 if (!bg_cache)
12648                         break;
12649                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12650                 if (ret < 0)
12651                         return ret;
12652                 current = bg_cache->key.objectid + bg_cache->key.offset;
12653         }
12654
12655         /* Don't forget to set cache_generation to -1 */
12656         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12657         if (IS_ERR(trans)) {
12658                 error("failed to update super block cache generation");
12659                 return PTR_ERR(trans);
12660         }
12661         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12662         btrfs_commit_transaction(trans, fs_info->tree_root);
12663
12664         return ret;
12665 }
12666
12667 const char * const cmd_check_usage[] = {
12668         "btrfs check [options] <device>",
12669         "Check structural integrity of a filesystem (unmounted).",
12670         "Check structural integrity of an unmounted filesystem. Verify internal",
12671         "trees' consistency and item connectivity. In the repair mode try to",
12672         "fix the problems found. ",
12673         "WARNING: the repair mode is considered dangerous",
12674         "",
12675         "-s|--super <superblock>     use this superblock copy",
12676         "-b|--backup                 use the first valid backup root copy",
12677         "--repair                    try to repair the filesystem",
12678         "--readonly                  run in read-only mode (default)",
12679         "--init-csum-tree            create a new CRC tree",
12680         "--init-extent-tree          create a new extent tree",
12681         "--mode <MODE>               allows choice of memory/IO trade-offs",
12682         "                            where MODE is one of:",
12683         "                            original - read inodes and extents to memory (requires",
12684         "                                       more memory, does less IO)",
12685         "                            lowmem   - try to use less memory but read blocks again",
12686         "                                       when needed",
12687         "--check-data-csum           verify checksums of data blocks",
12688         "-Q|--qgroup-report          print a report on qgroup consistency",
12689         "-E|--subvol-extents <subvolid>",
12690         "                            print subvolume extents and sharing state",
12691         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12692         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12693         "-p|--progress               indicate progress",
12694         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12695         NULL
12696 };
12697
12698 int cmd_check(int argc, char **argv)
12699 {
12700         struct cache_tree root_cache;
12701         struct btrfs_root *root;
12702         struct btrfs_fs_info *info;
12703         u64 bytenr = 0;
12704         u64 subvolid = 0;
12705         u64 tree_root_bytenr = 0;
12706         u64 chunk_root_bytenr = 0;
12707         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12708         int ret;
12709         int err = 0;
12710         u64 num;
12711         int init_csum_tree = 0;
12712         int readonly = 0;
12713         int clear_space_cache = 0;
12714         int qgroup_report = 0;
12715         int qgroups_repaired = 0;
12716         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12717
12718         while(1) {
12719                 int c;
12720                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12721                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12722                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12723                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12724                 static const struct option long_options[] = {
12725                         { "super", required_argument, NULL, 's' },
12726                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12727                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12728                         { "init-csum-tree", no_argument, NULL,
12729                                 GETOPT_VAL_INIT_CSUM },
12730                         { "init-extent-tree", no_argument, NULL,
12731                                 GETOPT_VAL_INIT_EXTENT },
12732                         { "check-data-csum", no_argument, NULL,
12733                                 GETOPT_VAL_CHECK_CSUM },
12734                         { "backup", no_argument, NULL, 'b' },
12735                         { "subvol-extents", required_argument, NULL, 'E' },
12736                         { "qgroup-report", no_argument, NULL, 'Q' },
12737                         { "tree-root", required_argument, NULL, 'r' },
12738                         { "chunk-root", required_argument, NULL,
12739                                 GETOPT_VAL_CHUNK_TREE },
12740                         { "progress", no_argument, NULL, 'p' },
12741                         { "mode", required_argument, NULL,
12742                                 GETOPT_VAL_MODE },
12743                         { "clear-space-cache", required_argument, NULL,
12744                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12745                         { NULL, 0, NULL, 0}
12746                 };
12747
12748                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12749                 if (c < 0)
12750                         break;
12751                 switch(c) {
12752                         case 'a': /* ignored */ break;
12753                         case 'b':
12754                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12755                                 break;
12756                         case 's':
12757                                 num = arg_strtou64(optarg);
12758                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12759                                         error(
12760                                         "super mirror should be less than %d",
12761                                                 BTRFS_SUPER_MIRROR_MAX);
12762                                         exit(1);
12763                                 }
12764                                 bytenr = btrfs_sb_offset(((int)num));
12765                                 printf("using SB copy %llu, bytenr %llu\n", num,
12766                                        (unsigned long long)bytenr);
12767                                 break;
12768                         case 'Q':
12769                                 qgroup_report = 1;
12770                                 break;
12771                         case 'E':
12772                                 subvolid = arg_strtou64(optarg);
12773                                 break;
12774                         case 'r':
12775                                 tree_root_bytenr = arg_strtou64(optarg);
12776                                 break;
12777                         case GETOPT_VAL_CHUNK_TREE:
12778                                 chunk_root_bytenr = arg_strtou64(optarg);
12779                                 break;
12780                         case 'p':
12781                                 ctx.progress_enabled = true;
12782                                 break;
12783                         case '?':
12784                         case 'h':
12785                                 usage(cmd_check_usage);
12786                         case GETOPT_VAL_REPAIR:
12787                                 printf("enabling repair mode\n");
12788                                 repair = 1;
12789                                 ctree_flags |= OPEN_CTREE_WRITES;
12790                                 break;
12791                         case GETOPT_VAL_READONLY:
12792                                 readonly = 1;
12793                                 break;
12794                         case GETOPT_VAL_INIT_CSUM:
12795                                 printf("Creating a new CRC tree\n");
12796                                 init_csum_tree = 1;
12797                                 repair = 1;
12798                                 ctree_flags |= OPEN_CTREE_WRITES;
12799                                 break;
12800                         case GETOPT_VAL_INIT_EXTENT:
12801                                 init_extent_tree = 1;
12802                                 ctree_flags |= (OPEN_CTREE_WRITES |
12803                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12804                                 repair = 1;
12805                                 break;
12806                         case GETOPT_VAL_CHECK_CSUM:
12807                                 check_data_csum = 1;
12808                                 break;
12809                         case GETOPT_VAL_MODE:
12810                                 check_mode = parse_check_mode(optarg);
12811                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12812                                         error("unknown mode: %s", optarg);
12813                                         exit(1);
12814                                 }
12815                                 break;
12816                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12817                                 if (strcmp(optarg, "v1") == 0) {
12818                                         clear_space_cache = 1;
12819                                 } else if (strcmp(optarg, "v2") == 0) {
12820                                         clear_space_cache = 2;
12821                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12822                                 } else {
12823                                         error(
12824                 "invalid argument to --clear-space-cache, must be v1 or v2");
12825                                         exit(1);
12826                                 }
12827                                 ctree_flags |= OPEN_CTREE_WRITES;
12828                                 break;
12829                 }
12830         }
12831
12832         if (check_argc_exact(argc - optind, 1))
12833                 usage(cmd_check_usage);
12834
12835         if (ctx.progress_enabled) {
12836                 ctx.tp = TASK_NOTHING;
12837                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12838         }
12839
12840         /* This check is the only reason for --readonly to exist */
12841         if (readonly && repair) {
12842                 error("repair options are not compatible with --readonly");
12843                 exit(1);
12844         }
12845
12846         /*
12847          * Not supported yet
12848          */
12849         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12850                 error("low memory mode doesn't support repair yet");
12851                 exit(1);
12852         }
12853
12854         radix_tree_init();
12855         cache_tree_init(&root_cache);
12856
12857         if((ret = check_mounted(argv[optind])) < 0) {
12858                 error("could not check mount status: %s", strerror(-ret));
12859                 err |= !!ret;
12860                 goto err_out;
12861         } else if(ret) {
12862                 error("%s is currently mounted, aborting", argv[optind]);
12863                 ret = -EBUSY;
12864                 err |= !!ret;
12865                 goto err_out;
12866         }
12867
12868         /* only allow partial opening under repair mode */
12869         if (repair)
12870                 ctree_flags |= OPEN_CTREE_PARTIAL;
12871
12872         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12873                                   chunk_root_bytenr, ctree_flags);
12874         if (!info) {
12875                 error("cannot open file system");
12876                 ret = -EIO;
12877                 err |= !!ret;
12878                 goto err_out;
12879         }
12880
12881         global_info = info;
12882         root = info->fs_root;
12883         if (clear_space_cache == 1) {
12884                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12885                         error(
12886                 "free space cache v2 detected, use --clear-space-cache v2");
12887                         ret = 1;
12888                         goto close_out;
12889                 }
12890                 printf("Clearing free space cache\n");
12891                 ret = clear_free_space_cache(info);
12892                 if (ret) {
12893                         error("failed to clear free space cache");
12894                         ret = 1;
12895                 } else {
12896                         printf("Free space cache cleared\n");
12897                 }
12898                 goto close_out;
12899         } else if (clear_space_cache == 2) {
12900                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12901                         printf("no free space cache v2 to clear\n");
12902                         ret = 0;
12903                         goto close_out;
12904                 }
12905                 printf("Clear free space cache v2\n");
12906                 ret = btrfs_clear_free_space_tree(info);
12907                 if (ret) {
12908                         error("failed to clear free space cache v2: %d", ret);
12909                         ret = 1;
12910                 } else {
12911                         printf("free space cache v2 cleared\n");
12912                 }
12913                 goto close_out;
12914         }
12915
12916         /*
12917          * repair mode will force us to commit transaction which
12918          * will make us fail to load log tree when mounting.
12919          */
12920         if (repair && btrfs_super_log_root(info->super_copy)) {
12921                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12922                 if (!ret) {
12923                         ret = 1;
12924                         err |= !!ret;
12925                         goto close_out;
12926                 }
12927                 ret = zero_log_tree(root);
12928                 err |= !!ret;
12929                 if (ret) {
12930                         error("failed to zero log tree: %d", ret);
12931                         goto close_out;
12932                 }
12933         }
12934
12935         uuid_unparse(info->super_copy->fsid, uuidbuf);
12936         if (qgroup_report) {
12937                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12938                        uuidbuf);
12939                 ret = qgroup_verify_all(info);
12940                 err |= !!ret;
12941                 if (ret == 0)
12942                         report_qgroups(1);
12943                 goto close_out;
12944         }
12945         if (subvolid) {
12946                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12947                        subvolid, argv[optind], uuidbuf);
12948                 ret = print_extent_state(info, subvolid);
12949                 err |= !!ret;
12950                 goto close_out;
12951         }
12952         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12953
12954         if (!extent_buffer_uptodate(info->tree_root->node) ||
12955             !extent_buffer_uptodate(info->dev_root->node) ||
12956             !extent_buffer_uptodate(info->chunk_root->node)) {
12957                 error("critical roots corrupted, unable to check the filesystem");
12958                 err |= !!ret;
12959                 ret = -EIO;
12960                 goto close_out;
12961         }
12962
12963         if (init_extent_tree || init_csum_tree) {
12964                 struct btrfs_trans_handle *trans;
12965
12966                 trans = btrfs_start_transaction(info->extent_root, 0);
12967                 if (IS_ERR(trans)) {
12968                         error("error starting transaction");
12969                         ret = PTR_ERR(trans);
12970                         err |= !!ret;
12971                         goto close_out;
12972                 }
12973
12974                 if (init_extent_tree) {
12975                         printf("Creating a new extent tree\n");
12976                         ret = reinit_extent_tree(trans, info);
12977                         err |= !!ret;
12978                         if (ret)
12979                                 goto close_out;
12980                 }
12981
12982                 if (init_csum_tree) {
12983                         printf("Reinitialize checksum tree\n");
12984                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12985                         if (ret) {
12986                                 error("checksum tree initialization failed: %d",
12987                                                 ret);
12988                                 ret = -EIO;
12989                                 err |= !!ret;
12990                                 goto close_out;
12991                         }
12992
12993                         ret = fill_csum_tree(trans, info->csum_root,
12994                                              init_extent_tree);
12995                         err |= !!ret;
12996                         if (ret) {
12997                                 error("checksum tree refilling failed: %d", ret);
12998                                 return -EIO;
12999                         }
13000                 }
13001                 /*
13002                  * Ok now we commit and run the normal fsck, which will add
13003                  * extent entries for all of the items it finds.
13004                  */
13005                 ret = btrfs_commit_transaction(trans, info->extent_root);
13006                 err |= !!ret;
13007                 if (ret)
13008                         goto close_out;
13009         }
13010         if (!extent_buffer_uptodate(info->extent_root->node)) {
13011                 error("critical: extent_root, unable to check the filesystem");
13012                 ret = -EIO;
13013                 err |= !!ret;
13014                 goto close_out;
13015         }
13016         if (!extent_buffer_uptodate(info->csum_root->node)) {
13017                 error("critical: csum_root, unable to check the filesystem");
13018                 ret = -EIO;
13019                 err |= !!ret;
13020                 goto close_out;
13021         }
13022
13023         if (!ctx.progress_enabled)
13024                 fprintf(stderr, "checking extents\n");
13025         if (check_mode == CHECK_MODE_LOWMEM)
13026                 ret = check_chunks_and_extents_v2(root);
13027         else
13028                 ret = check_chunks_and_extents(root);
13029         err |= !!ret;
13030         if (ret)
13031                 error(
13032                 "errors found in extent allocation tree or chunk allocation");
13033
13034         ret = repair_root_items(info);
13035         err |= !!ret;
13036         if (ret < 0) {
13037                 error("failed to repair root items: %s", strerror(-ret));
13038                 goto close_out;
13039         }
13040         if (repair) {
13041                 fprintf(stderr, "Fixed %d roots.\n", ret);
13042                 ret = 0;
13043         } else if (ret > 0) {
13044                 fprintf(stderr,
13045                        "Found %d roots with an outdated root item.\n",
13046                        ret);
13047                 fprintf(stderr,
13048                         "Please run a filesystem check with the option --repair to fix them.\n");
13049                 ret = 1;
13050                 err |= !!ret;
13051                 goto close_out;
13052         }
13053
13054         if (!ctx.progress_enabled) {
13055                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13056                         fprintf(stderr, "checking free space tree\n");
13057                 else
13058                         fprintf(stderr, "checking free space cache\n");
13059         }
13060         ret = check_space_cache(root);
13061         err |= !!ret;
13062         if (ret) {
13063                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13064                         error("errors found in free space tree");
13065                 else
13066                         error("errors found in free space cache");
13067                 goto out;
13068         }
13069
13070         /*
13071          * We used to have to have these hole extents in between our real
13072          * extents so if we don't have this flag set we need to make sure there
13073          * are no gaps in the file extents for inodes, otherwise we can just
13074          * ignore it when this happens.
13075          */
13076         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13077         if (!ctx.progress_enabled)
13078                 fprintf(stderr, "checking fs roots\n");
13079         if (check_mode == CHECK_MODE_LOWMEM)
13080                 ret = check_fs_roots_v2(root->fs_info);
13081         else
13082                 ret = check_fs_roots(root, &root_cache);
13083         err |= !!ret;
13084         if (ret) {
13085                 error("errors found in fs roots");
13086                 goto out;
13087         }
13088
13089         fprintf(stderr, "checking csums\n");
13090         ret = check_csums(root);
13091         err |= !!ret;
13092         if (ret) {
13093                 error("errors found in csum tree");
13094                 goto out;
13095         }
13096
13097         fprintf(stderr, "checking root refs\n");
13098         /* For low memory mode, check_fs_roots_v2 handles root refs */
13099         if (check_mode != CHECK_MODE_LOWMEM) {
13100                 ret = check_root_refs(root, &root_cache);
13101                 err |= !!ret;
13102                 if (ret) {
13103                         error("errors found in root refs");
13104                         goto out;
13105                 }
13106         }
13107
13108         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13109                 struct extent_buffer *eb;
13110
13111                 eb = list_first_entry(&root->fs_info->recow_ebs,
13112                                       struct extent_buffer, recow);
13113                 list_del_init(&eb->recow);
13114                 ret = recow_extent_buffer(root, eb);
13115                 err |= !!ret;
13116                 if (ret) {
13117                         error("fails to fix transid errors");
13118                         break;
13119                 }
13120         }
13121
13122         while (!list_empty(&delete_items)) {
13123                 struct bad_item *bad;
13124
13125                 bad = list_first_entry(&delete_items, struct bad_item, list);
13126                 list_del_init(&bad->list);
13127                 if (repair) {
13128                         ret = delete_bad_item(root, bad);
13129                         err |= !!ret;
13130                 }
13131                 free(bad);
13132         }
13133
13134         if (info->quota_enabled) {
13135                 fprintf(stderr, "checking quota groups\n");
13136                 ret = qgroup_verify_all(info);
13137                 err |= !!ret;
13138                 if (ret) {
13139                         error("failed to check quota groups");
13140                         goto out;
13141                 }
13142                 report_qgroups(0);
13143                 ret = repair_qgroups(info, &qgroups_repaired);
13144                 err |= !!ret;
13145                 if (err) {
13146                         error("failed to repair quota groups");
13147                         goto out;
13148                 }
13149                 ret = 0;
13150         }
13151
13152         if (!list_empty(&root->fs_info->recow_ebs)) {
13153                 error("transid errors in file system");
13154                 ret = 1;
13155                 err |= !!ret;
13156         }
13157 out:
13158         if (found_old_backref) { /*
13159                  * there was a disk format change when mixed
13160                  * backref was in testing tree. The old format
13161                  * existed about one week.
13162                  */
13163                 printf("\n * Found old mixed backref format. "
13164                        "The old format is not supported! *"
13165                        "\n * Please mount the FS in readonly mode, "
13166                        "backup data and re-format the FS. *\n\n");
13167                 err |= 1;
13168         }
13169         printf("found %llu bytes used, ",
13170                (unsigned long long)bytes_used);
13171         if (err)
13172                 printf("error(s) found\n");
13173         else
13174                 printf("no error found\n");
13175         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13176         printf("total tree bytes: %llu\n",
13177                (unsigned long long)total_btree_bytes);
13178         printf("total fs tree bytes: %llu\n",
13179                (unsigned long long)total_fs_tree_bytes);
13180         printf("total extent tree bytes: %llu\n",
13181                (unsigned long long)total_extent_tree_bytes);
13182         printf("btree space waste bytes: %llu\n",
13183                (unsigned long long)btree_space_waste);
13184         printf("file data blocks allocated: %llu\n referenced %llu\n",
13185                 (unsigned long long)data_bytes_allocated,
13186                 (unsigned long long)data_bytes_referenced);
13187
13188         free_qgroup_counts();
13189         free_root_recs_tree(&root_cache);
13190 close_out:
13191         close_ctree(root);
13192 err_out:
13193         if (ctx.progress_enabled)
13194                 task_deinit(ctx.info);
13195
13196         return err;
13197 }