btrfs-progs: check: lowmem: Fix false alert on inline compressed extent
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct inode_backref *backref)
2537 {
2538         struct btrfs_trans_handle *trans;
2539         struct btrfs_dir_item *di;
2540         struct btrfs_path path;
2541         int ret = 0;
2542
2543         trans = btrfs_start_transaction(root, 1);
2544         if (IS_ERR(trans))
2545                 return PTR_ERR(trans);
2546
2547         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548                 (unsigned long long)backref->dir,
2549                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550                 (unsigned long long)root->objectid);
2551
2552         btrfs_init_path(&path);
2553         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554                                     backref->name, backref->namelen,
2555                                     backref->index, -1);
2556         if (IS_ERR(di)) {
2557                 ret = PTR_ERR(di);
2558                 btrfs_release_path(&path);
2559                 btrfs_commit_transaction(trans, root);
2560                 if (ret == -ENOENT)
2561                         return 0;
2562                 return ret;
2563         }
2564
2565         if (!di)
2566                 ret = btrfs_del_item(trans, root, &path);
2567         else
2568                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569         BUG_ON(ret);
2570         btrfs_release_path(&path);
2571         btrfs_commit_transaction(trans, root);
2572         return ret;
2573 }
2574
2575 static int create_inode_item(struct btrfs_root *root,
2576                              struct inode_record *rec,
2577                              int root_dir)
2578 {
2579         struct btrfs_trans_handle *trans;
2580         struct btrfs_inode_item inode_item;
2581         time_t now = time(NULL);
2582         int ret;
2583
2584         trans = btrfs_start_transaction(root, 1);
2585         if (IS_ERR(trans)) {
2586                 ret = PTR_ERR(trans);
2587                 return ret;
2588         }
2589
2590         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591                 "be incomplete, please check permissions and content after "
2592                 "the fsck completes.\n", (unsigned long long)root->objectid,
2593                 (unsigned long long)rec->ino);
2594
2595         memset(&inode_item, 0, sizeof(inode_item));
2596         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597         if (root_dir)
2598                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599         else
2600                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602         if (rec->found_dir_item) {
2603                 if (rec->found_file_extent)
2604                         fprintf(stderr, "root %llu inode %llu has both a dir "
2605                                 "item and extents, unsure if it is a dir or a "
2606                                 "regular file so setting it as a directory\n",
2607                                 (unsigned long long)root->objectid,
2608                                 (unsigned long long)rec->ino);
2609                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611         } else if (!rec->found_dir_item) {
2612                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2614         }
2615         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2623
2624         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625         BUG_ON(ret);
2626         btrfs_commit_transaction(trans, root);
2627         return 0;
2628 }
2629
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631                                  struct inode_record *rec,
2632                                  struct cache_tree *inode_cache,
2633                                  int delete)
2634 {
2635         struct inode_backref *tmp, *backref;
2636         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637         int ret = 0;
2638         int repaired = 0;
2639
2640         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641                 if (!delete && rec->ino == root_dirid) {
2642                         if (!rec->found_inode_item) {
2643                                 ret = create_inode_item(root, rec, 1);
2644                                 if (ret)
2645                                         break;
2646                                 repaired++;
2647                         }
2648                 }
2649
2650                 /* Index 0 for root dir's are special, don't mess with it */
2651                 if (rec->ino == root_dirid && backref->index == 0)
2652                         continue;
2653
2654                 if (delete &&
2655                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2656                      (backref->found_dir_index && backref->found_inode_ref &&
2657                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658                         ret = delete_dir_index(root, backref);
2659                         if (ret)
2660                                 break;
2661                         repaired++;
2662                         list_del(&backref->list);
2663                         free(backref);
2664                 }
2665
2666                 if (!delete && !backref->found_dir_index &&
2667                     backref->found_dir_item && backref->found_inode_ref) {
2668                         ret = add_missing_dir_index(root, inode_cache, rec,
2669                                                     backref);
2670                         if (ret)
2671                                 break;
2672                         repaired++;
2673                         if (backref->found_dir_item &&
2674                             backref->found_dir_index &&
2675                             backref->found_dir_index) {
2676                                 if (!backref->errors &&
2677                                     backref->found_inode_ref) {
2678                                         list_del(&backref->list);
2679                                         free(backref);
2680                                 }
2681                         }
2682                 }
2683
2684                 if (!delete && (!backref->found_dir_index &&
2685                                 !backref->found_dir_item &&
2686                                 backref->found_inode_ref)) {
2687                         struct btrfs_trans_handle *trans;
2688                         struct btrfs_key location;
2689
2690                         ret = check_dir_conflict(root, backref->name,
2691                                                  backref->namelen,
2692                                                  backref->dir,
2693                                                  backref->index);
2694                         if (ret) {
2695                                 /*
2696                                  * let nlink fixing routine to handle it,
2697                                  * which can do it better.
2698                                  */
2699                                 ret = 0;
2700                                 break;
2701                         }
2702                         location.objectid = rec->ino;
2703                         location.type = BTRFS_INODE_ITEM_KEY;
2704                         location.offset = 0;
2705
2706                         trans = btrfs_start_transaction(root, 1);
2707                         if (IS_ERR(trans)) {
2708                                 ret = PTR_ERR(trans);
2709                                 break;
2710                         }
2711                         fprintf(stderr, "adding missing dir index/item pair "
2712                                 "for inode %llu\n",
2713                                 (unsigned long long)rec->ino);
2714                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2715                                                     backref->namelen,
2716                                                     backref->dir, &location,
2717                                                     imode_to_type(rec->imode),
2718                                                     backref->index);
2719                         BUG_ON(ret);
2720                         btrfs_commit_transaction(trans, root);
2721                         repaired++;
2722                 }
2723
2724                 if (!delete && (backref->found_inode_ref &&
2725                                 backref->found_dir_index &&
2726                                 backref->found_dir_item &&
2727                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728                                 !rec->found_inode_item)) {
2729                         ret = create_inode_item(root, rec, 0);
2730                         if (ret)
2731                                 break;
2732                         repaired++;
2733                 }
2734
2735         }
2736         return ret ? ret : repaired;
2737 }
2738
2739 /*
2740  * To determine the file type for nlink/inode_item repair
2741  *
2742  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743  * Return -ENOENT if file type is not found.
2744  */
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2746 {
2747         struct inode_backref *backref;
2748
2749         /* For inode item recovered case */
2750         if (rec->found_inode_item) {
2751                 *type = imode_to_type(rec->imode);
2752                 return 0;
2753         }
2754
2755         list_for_each_entry(backref, &rec->backrefs, list) {
2756                 if (backref->found_dir_index || backref->found_dir_item) {
2757                         *type = backref->filetype;
2758                         return 0;
2759                 }
2760         }
2761         return -ENOENT;
2762 }
2763
2764 /*
2765  * To determine the file name for nlink repair
2766  *
2767  * Return 0 if file name is found, set name and namelen.
2768  * Return -ENOENT if file name is not found.
2769  */
2770 static int find_file_name(struct inode_record *rec,
2771                           char *name, int *namelen)
2772 {
2773         struct inode_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->found_dir_index || backref->found_dir_item ||
2777                     backref->found_inode_ref) {
2778                         memcpy(name, backref->name, backref->namelen);
2779                         *namelen = backref->namelen;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788                        struct btrfs_root *root,
2789                        struct btrfs_path *path,
2790                        struct inode_record *rec)
2791 {
2792         struct inode_backref *backref;
2793         struct inode_backref *tmp;
2794         struct btrfs_key key;
2795         struct btrfs_inode_item *inode_item;
2796         int ret = 0;
2797
2798         /* We don't believe this either, reset it and iterate backref */
2799         rec->found_link = 0;
2800
2801         /* Remove all backref including the valid ones */
2802         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804                                    backref->index, backref->name,
2805                                    backref->namelen, 0);
2806                 if (ret < 0)
2807                         goto out;
2808
2809                 /* remove invalid backref, so it won't be added back */
2810                 if (!(backref->found_dir_index &&
2811                       backref->found_dir_item &&
2812                       backref->found_inode_ref)) {
2813                         list_del(&backref->list);
2814                         free(backref);
2815                 } else {
2816                         rec->found_link++;
2817                 }
2818         }
2819
2820         /* Set nlink to 0 */
2821         key.objectid = rec->ino;
2822         key.type = BTRFS_INODE_ITEM_KEY;
2823         key.offset = 0;
2824         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825         if (ret < 0)
2826                 goto out;
2827         if (ret > 0) {
2828                 ret = -ENOENT;
2829                 goto out;
2830         }
2831         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                                     struct btrfs_inode_item);
2833         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         btrfs_release_path(path);
2836
2837         /*
2838          * Add back valid inode_ref/dir_item/dir_index,
2839          * add_link() will handle the nlink inc, so new nlink must be correct
2840          */
2841         list_for_each_entry(backref, &rec->backrefs, list) {
2842                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843                                      backref->name, backref->namelen,
2844                                      backref->filetype, &backref->index, 1);
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848 out:
2849         btrfs_release_path(path);
2850         return ret;
2851 }
2852
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854                                 struct btrfs_root *root,
2855                                 struct btrfs_path *path,
2856                                 u64 *highest_ino)
2857 {
2858         struct btrfs_key key, found_key;
2859         int ret;
2860
2861         btrfs_init_path(path);
2862         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863         key.offset = -1;
2864         key.type = BTRFS_INODE_ITEM_KEY;
2865         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866         if (ret == 1) {
2867                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868                                 path->slots[0] - 1);
2869                 *highest_ino = found_key.objectid;
2870                 ret = 0;
2871         }
2872         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873                 ret = -EOVERFLOW;
2874         btrfs_release_path(path);
2875         return ret;
2876 }
2877
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879                                struct btrfs_root *root,
2880                                struct btrfs_path *path,
2881                                struct inode_record *rec)
2882 {
2883         char *dir_name = "lost+found";
2884         char namebuf[BTRFS_NAME_LEN] = {0};
2885         u64 lost_found_ino;
2886         u32 mode = 0700;
2887         u8 type = 0;
2888         int namelen = 0;
2889         int name_recovered = 0;
2890         int type_recovered = 0;
2891         int ret = 0;
2892
2893         /*
2894          * Get file name and type first before these invalid inode ref
2895          * are deleted by remove_all_invalid_backref()
2896          */
2897         name_recovered = !find_file_name(rec, namebuf, &namelen);
2898         type_recovered = !find_file_type(rec, &type);
2899
2900         if (!name_recovered) {
2901                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902                        rec->ino, rec->ino);
2903                 namelen = count_digits(rec->ino);
2904                 sprintf(namebuf, "%llu", rec->ino);
2905                 name_recovered = 1;
2906         }
2907         if (!type_recovered) {
2908                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909                        rec->ino);
2910                 type = BTRFS_FT_REG_FILE;
2911                 type_recovered = 1;
2912         }
2913
2914         ret = reset_nlink(trans, root, path, rec);
2915         if (ret < 0) {
2916                 fprintf(stderr,
2917                         "Failed to reset nlink for inode %llu: %s\n",
2918                         rec->ino, strerror(-ret));
2919                 goto out;
2920         }
2921
2922         if (rec->found_link == 0) {
2923                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924                 if (ret < 0)
2925                         goto out;
2926                 lost_found_ino++;
2927                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929                                   mode);
2930                 if (ret < 0) {
2931                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932                                 dir_name, strerror(-ret));
2933                         goto out;
2934                 }
2935                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936                                      namebuf, namelen, type, NULL, 1);
2937                 /*
2938                  * Add ".INO" suffix several times to handle case where
2939                  * "FILENAME.INO" is already taken by another file.
2940                  */
2941                 while (ret == -EEXIST) {
2942                         /*
2943                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2944                          */
2945                         if (namelen + count_digits(rec->ino) + 1 >
2946                             BTRFS_NAME_LEN) {
2947                                 ret = -EFBIG;
2948                                 goto out;
2949                         }
2950                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951                                  ".%llu", rec->ino);
2952                         namelen += count_digits(rec->ino) + 1;
2953                         ret = btrfs_add_link(trans, root, rec->ino,
2954                                              lost_found_ino, namebuf,
2955                                              namelen, type, NULL, 1);
2956                 }
2957                 if (ret < 0) {
2958                         fprintf(stderr,
2959                                 "Failed to link the inode %llu to %s dir: %s\n",
2960                                 rec->ino, dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 /*
2964                  * Just increase the found_link, don't actually add the
2965                  * backref. This will make things easier and this inode
2966                  * record will be freed after the repair is done.
2967                  * So fsck will not report problem about this inode.
2968                  */
2969                 rec->found_link++;
2970                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971                        namelen, namebuf, dir_name);
2972         }
2973         printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2975         /*
2976          * Clear the flag anyway, or we will loop forever for the same inode
2977          * as it will not be removed from the bad inode list and the dead loop
2978          * happens.
2979          */
2980         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981         btrfs_release_path(path);
2982         return ret;
2983 }
2984
2985 /*
2986  * Check if there is any normal(reg or prealloc) file extent for given
2987  * ino.
2988  * This is used to determine the file type when neither its dir_index/item or
2989  * inode_item exists.
2990  *
2991  * This will *NOT* report error, if any error happens, just consider it does
2992  * not have any normal file extent.
2993  */
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2995 {
2996         struct btrfs_path path;
2997         struct btrfs_key key;
2998         struct btrfs_key found_key;
2999         struct btrfs_file_extent_item *fi;
3000         u8 type;
3001         int ret = 0;
3002
3003         btrfs_init_path(&path);
3004         key.objectid = ino;
3005         key.type = BTRFS_EXTENT_DATA_KEY;
3006         key.offset = 0;
3007
3008         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009         if (ret < 0) {
3010                 ret = 0;
3011                 goto out;
3012         }
3013         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014                 ret = btrfs_next_leaf(root, &path);
3015                 if (ret) {
3016                         ret = 0;
3017                         goto out;
3018                 }
3019         }
3020         while (1) {
3021                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022                                       path.slots[0]);
3023                 if (found_key.objectid != ino ||
3024                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3025                         break;
3026                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027                                     struct btrfs_file_extent_item);
3028                 type = btrfs_file_extent_type(path.nodes[0], fi);
3029                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030                         ret = 1;
3031                         goto out;
3032                 }
3033         }
3034 out:
3035         btrfs_release_path(&path);
3036         return ret;
3037 }
3038
3039 static u32 btrfs_type_to_imode(u8 type)
3040 {
3041         static u32 imode_by_btrfs_type[] = {
3042                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3043                 [BTRFS_FT_DIR]          = S_IFDIR,
3044                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3045                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3046                 [BTRFS_FT_FIFO]         = S_IFIFO,
3047                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3048                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3049         };
3050
3051         return imode_by_btrfs_type[(type)];
3052 }
3053
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055                                 struct btrfs_root *root,
3056                                 struct btrfs_path *path,
3057                                 struct inode_record *rec)
3058 {
3059         u8 filetype;
3060         u32 mode = 0700;
3061         int type_recovered = 0;
3062         int ret = 0;
3063
3064         printf("Trying to rebuild inode:%llu\n", rec->ino);
3065
3066         type_recovered = !find_file_type(rec, &filetype);
3067
3068         /*
3069          * Try to determine inode type if type not found.
3070          *
3071          * For found regular file extent, it must be FILE.
3072          * For found dir_item/index, it must be DIR.
3073          *
3074          * For undetermined one, use FILE as fallback.
3075          *
3076          * TODO:
3077          * 1. If found backref(inode_index/item is already handled) to it,
3078          *    it must be DIR.
3079          *    Need new inode-inode ref structure to allow search for that.
3080          */
3081         if (!type_recovered) {
3082                 if (rec->found_file_extent &&
3083                     find_normal_file_extent(root, rec->ino)) {
3084                         type_recovered = 1;
3085                         filetype = BTRFS_FT_REG_FILE;
3086                 } else if (rec->found_dir_item) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_DIR;
3089                 } else if (!list_empty(&rec->orphan_extents)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else{
3093                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094                                rec->ino);
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 }
3098         }
3099
3100         ret = btrfs_new_inode(trans, root, rec->ino,
3101                               mode | btrfs_type_to_imode(filetype));
3102         if (ret < 0)
3103                 goto out;
3104
3105         /*
3106          * Here inode rebuild is done, we only rebuild the inode item,
3107          * don't repair the nlink(like move to lost+found).
3108          * That is the job of nlink repair.
3109          *
3110          * We just fill the record and return
3111          */
3112         rec->found_dir_item = 1;
3113         rec->imode = mode | btrfs_type_to_imode(filetype);
3114         rec->nlink = 0;
3115         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116         /* Ensure the inode_nlinks repair function will be called */
3117         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119         return ret;
3120 }
3121
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123                                       struct btrfs_root *root,
3124                                       struct btrfs_path *path,
3125                                       struct inode_record *rec)
3126 {
3127         struct orphan_data_extent *orphan;
3128         struct orphan_data_extent *tmp;
3129         int ret = 0;
3130
3131         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3132                 /*
3133                  * Check for conflicting file extents
3134                  *
3135                  * Here we don't know whether the extents is compressed or not,
3136                  * so we can only assume it not compressed nor data offset,
3137                  * and use its disk_len as extent length.
3138                  */
3139                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140                                        orphan->offset, orphan->disk_len, 0);
3141                 btrfs_release_path(path);
3142                 if (ret < 0)
3143                         goto out;
3144                 if (!ret) {
3145                         fprintf(stderr,
3146                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147                                 orphan->disk_bytenr, orphan->disk_len);
3148                         ret = btrfs_free_extent(trans,
3149                                         root->fs_info->extent_root,
3150                                         orphan->disk_bytenr, orphan->disk_len,
3151                                         0, root->objectid, orphan->objectid,
3152                                         orphan->offset);
3153                         if (ret < 0)
3154                                 goto out;
3155                 }
3156                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157                                 orphan->offset, orphan->disk_bytenr,
3158                                 orphan->disk_len, orphan->disk_len);
3159                 if (ret < 0)
3160                         goto out;
3161
3162                 /* Update file size info */
3163                 rec->found_size += orphan->disk_len;
3164                 if (rec->found_size == rec->nbytes)
3165                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3166
3167                 /* Update the file extent hole info too */
3168                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169                                            orphan->disk_len);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (RB_EMPTY_ROOT(&rec->holes))
3173                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3174
3175                 list_del(&orphan->list);
3176                 free(orphan);
3177         }
3178         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180         return ret;
3181 }
3182
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184                                         struct btrfs_root *root,
3185                                         struct btrfs_path *path,
3186                                         struct inode_record *rec)
3187 {
3188         struct rb_node *node;
3189         struct file_extent_hole *hole;
3190         int found = 0;
3191         int ret = 0;
3192
3193         node = rb_first(&rec->holes);
3194
3195         while (node) {
3196                 found = 1;
3197                 hole = rb_entry(node, struct file_extent_hole, node);
3198                 ret = btrfs_punch_hole(trans, root, rec->ino,
3199                                        hole->start, hole->len);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = del_file_extent_hole(&rec->holes, hole->start,
3203                                            hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 if (RB_EMPTY_ROOT(&rec->holes))
3207                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208                 node = rb_first(&rec->holes);
3209         }
3210         /* special case for a file losing all its file extent */
3211         if (!found) {
3212                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213                                        round_up(rec->isize, root->sectorsize));
3214                 if (ret < 0)
3215                         goto out;
3216         }
3217         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218                rec->ino, root->objectid);
3219 out:
3220         return ret;
3221 }
3222
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_path path;
3227         int ret = 0;
3228
3229         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230                              I_ERR_NO_ORPHAN_ITEM |
3231                              I_ERR_LINK_COUNT_WRONG |
3232                              I_ERR_NO_INODE_ITEM |
3233                              I_ERR_FILE_EXTENT_ORPHAN |
3234                              I_ERR_FILE_EXTENT_DISCOUNT|
3235                              I_ERR_FILE_NBYTES_WRONG)))
3236                 return rec->errors;
3237
3238         /*
3239          * For nlink repair, it may create a dir and add link, so
3240          * 2 for parent(256)'s dir_index and dir_item
3241          * 2 for lost+found dir's inode_item and inode_ref
3242          * 1 for the new inode_ref of the file
3243          * 2 for lost+found dir's dir_index and dir_item for the file
3244          */
3245         trans = btrfs_start_transaction(root, 7);
3246         if (IS_ERR(trans))
3247                 return PTR_ERR(trans);
3248
3249         btrfs_init_path(&path);
3250         if (rec->errors & I_ERR_NO_INODE_ITEM)
3251                 ret = repair_inode_no_item(trans, root, &path, rec);
3252         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257                 ret = repair_inode_isize(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261                 ret = repair_inode_nlinks(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263                 ret = repair_inode_nbytes(trans, root, &path, rec);
3264         btrfs_commit_transaction(trans, root);
3265         btrfs_release_path(&path);
3266         return ret;
3267 }
3268
3269 static int check_inode_recs(struct btrfs_root *root,
3270                             struct cache_tree *inode_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int stage = 0;
3277         int ret = 0;
3278         int err = 0;
3279         u64 error = 0;
3280         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3281
3282         if (btrfs_root_refs(&root->root_item) == 0) {
3283                 if (!cache_tree_empty(inode_cache))
3284                         fprintf(stderr, "warning line %d\n", __LINE__);
3285                 return 0;
3286         }
3287
3288         /*
3289          * We need to repair backrefs first because we could change some of the
3290          * errors in the inode recs.
3291          *
3292          * We also need to go through and delete invalid backrefs first and then
3293          * add the correct ones second.  We do this because we may get EEXIST
3294          * when adding back the correct index because we hadn't yet deleted the
3295          * invalid index.
3296          *
3297          * For example, if we were missing a dir index then the directories
3298          * isize would be wrong, so if we fixed the isize to what we thought it
3299          * would be and then fixed the backref we'd still have a invalid fs, so
3300          * we need to add back the dir index and then check to see if the isize
3301          * is still wrong.
3302          */
3303         while (stage < 3) {
3304                 stage++;
3305                 if (stage == 3 && !err)
3306                         break;
3307
3308                 cache = search_cache_extent(inode_cache, 0);
3309                 while (repair && cache) {
3310                         node = container_of(cache, struct ptr_node, cache);
3311                         rec = node->data;
3312                         cache = next_cache_extent(cache);
3313
3314                         /* Need to free everything up and rescan */
3315                         if (stage == 3) {
3316                                 remove_cache_extent(inode_cache, &node->cache);
3317                                 free(node);
3318                                 free_inode_rec(rec);
3319                                 continue;
3320                         }
3321
3322                         if (list_empty(&rec->backrefs))
3323                                 continue;
3324
3325                         ret = repair_inode_backrefs(root, rec, inode_cache,
3326                                                     stage == 1);
3327                         if (ret < 0) {
3328                                 err = ret;
3329                                 stage = 2;
3330                                 break;
3331                         } if (ret > 0) {
3332                                 err = -EAGAIN;
3333                         }
3334                 }
3335         }
3336         if (err)
3337                 return err;
3338
3339         rec = get_inode_rec(inode_cache, root_dirid, 0);
3340         BUG_ON(IS_ERR(rec));
3341         if (rec) {
3342                 ret = check_root_dir(rec);
3343                 if (ret) {
3344                         fprintf(stderr, "root %llu root dir %llu error\n",
3345                                 (unsigned long long)root->root_key.objectid,
3346                                 (unsigned long long)root_dirid);
3347                         print_inode_error(root, rec);
3348                         error++;
3349                 }
3350         } else {
3351                 if (repair) {
3352                         struct btrfs_trans_handle *trans;
3353
3354                         trans = btrfs_start_transaction(root, 1);
3355                         if (IS_ERR(trans)) {
3356                                 err = PTR_ERR(trans);
3357                                 return err;
3358                         }
3359
3360                         fprintf(stderr,
3361                                 "root %llu missing its root dir, recreating\n",
3362                                 (unsigned long long)root->objectid);
3363
3364                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3365                         BUG_ON(ret);
3366
3367                         btrfs_commit_transaction(trans, root);
3368                         return -EAGAIN;
3369                 }
3370
3371                 fprintf(stderr, "root %llu root dir %llu not found\n",
3372                         (unsigned long long)root->root_key.objectid,
3373                         (unsigned long long)root_dirid);
3374         }
3375
3376         while (1) {
3377                 cache = search_cache_extent(inode_cache, 0);
3378                 if (!cache)
3379                         break;
3380                 node = container_of(cache, struct ptr_node, cache);
3381                 rec = node->data;
3382                 remove_cache_extent(inode_cache, &node->cache);
3383                 free(node);
3384                 if (rec->ino == root_dirid ||
3385                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386                         free_inode_rec(rec);
3387                         continue;
3388                 }
3389
3390                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391                         ret = check_orphan_item(root, rec->ino);
3392                         if (ret == 0)
3393                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394                         if (can_free_inode_rec(rec)) {
3395                                 free_inode_rec(rec);
3396                                 continue;
3397                         }
3398                 }
3399
3400                 if (!rec->found_inode_item)
3401                         rec->errors |= I_ERR_NO_INODE_ITEM;
3402                 if (rec->found_link != rec->nlink)
3403                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404                 if (repair) {
3405                         ret = try_repair_inode(root, rec);
3406                         if (ret == 0 && can_free_inode_rec(rec)) {
3407                                 free_inode_rec(rec);
3408                                 continue;
3409                         }
3410                         ret = 0;
3411                 }
3412
3413                 if (!(repair && ret == 0))
3414                         error++;
3415                 print_inode_error(root, rec);
3416                 list_for_each_entry(backref, &rec->backrefs, list) {
3417                         if (!backref->found_dir_item)
3418                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419                         if (!backref->found_dir_index)
3420                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421                         if (!backref->found_inode_ref)
3422                                 backref->errors |= REF_ERR_NO_INODE_REF;
3423                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424                                 " namelen %u name %s filetype %d errors %x",
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->filetype, backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431                 free_inode_rec(rec);
3432         }
3433         return (error > 0) ? -1 : 0;
3434 }
3435
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437                                         u64 objectid)
3438 {
3439         struct cache_extent *cache;
3440         struct root_record *rec = NULL;
3441         int ret;
3442
3443         cache = lookup_cache_extent(root_cache, objectid, 1);
3444         if (cache) {
3445                 rec = container_of(cache, struct root_record, cache);
3446         } else {
3447                 rec = calloc(1, sizeof(*rec));
3448                 if (!rec)
3449                         return ERR_PTR(-ENOMEM);
3450                 rec->objectid = objectid;
3451                 INIT_LIST_HEAD(&rec->backrefs);
3452                 rec->cache.start = objectid;
3453                 rec->cache.size = 1;
3454
3455                 ret = insert_cache_extent(root_cache, &rec->cache);
3456                 if (ret)
3457                         return ERR_PTR(-EEXIST);
3458         }
3459         return rec;
3460 }
3461
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463                                              u64 ref_root, u64 dir, u64 index,
3464                                              const char *name, int namelen)
3465 {
3466         struct root_backref *backref;
3467
3468         list_for_each_entry(backref, &rec->backrefs, list) {
3469                 if (backref->ref_root != ref_root || backref->dir != dir ||
3470                     backref->namelen != namelen)
3471                         continue;
3472                 if (memcmp(name, backref->name, namelen))
3473                         continue;
3474                 return backref;
3475         }
3476
3477         backref = calloc(1, sizeof(*backref) + namelen + 1);
3478         if (!backref)
3479                 return NULL;
3480         backref->ref_root = ref_root;
3481         backref->dir = dir;
3482         backref->index = index;
3483         backref->namelen = namelen;
3484         memcpy(backref->name, name, namelen);
3485         backref->name[namelen] = '\0';
3486         list_add_tail(&backref->list, &rec->backrefs);
3487         return backref;
3488 }
3489
3490 static void free_root_record(struct cache_extent *cache)
3491 {
3492         struct root_record *rec;
3493         struct root_backref *backref;
3494
3495         rec = container_of(cache, struct root_record, cache);
3496         while (!list_empty(&rec->backrefs)) {
3497                 backref = to_root_backref(rec->backrefs.next);
3498                 list_del(&backref->list);
3499                 free(backref);
3500         }
3501
3502         free(rec);
3503 }
3504
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3506
3507 static int add_root_backref(struct cache_tree *root_cache,
3508                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3509                             const char *name, int namelen,
3510                             int item_type, int errors)
3511 {
3512         struct root_record *rec;
3513         struct root_backref *backref;
3514
3515         rec = get_root_rec(root_cache, root_id);
3516         BUG_ON(IS_ERR(rec));
3517         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518         BUG_ON(!backref);
3519
3520         backref->errors |= errors;
3521
3522         if (item_type != BTRFS_DIR_ITEM_KEY) {
3523                 if (backref->found_dir_index || backref->found_back_ref ||
3524                     backref->found_forward_ref) {
3525                         if (backref->index != index)
3526                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527                 } else {
3528                         backref->index = index;
3529                 }
3530         }
3531
3532         if (item_type == BTRFS_DIR_ITEM_KEY) {
3533                 if (backref->found_forward_ref)
3534                         rec->found_ref++;
3535                 backref->found_dir_item = 1;
3536         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537                 backref->found_dir_index = 1;
3538         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539                 if (backref->found_forward_ref)
3540                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3541                 else if (backref->found_dir_item)
3542                         rec->found_ref++;
3543                 backref->found_forward_ref = 1;
3544         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545                 if (backref->found_back_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547                 backref->found_back_ref = 1;
3548         } else {
3549                 BUG_ON(1);
3550         }
3551
3552         if (backref->found_forward_ref && backref->found_dir_item)
3553                 backref->reachable = 1;
3554         return 0;
3555 }
3556
3557 static int merge_root_recs(struct btrfs_root *root,
3558                            struct cache_tree *src_cache,
3559                            struct cache_tree *dst_cache)
3560 {
3561         struct cache_extent *cache;
3562         struct ptr_node *node;
3563         struct inode_record *rec;
3564         struct inode_backref *backref;
3565         int ret = 0;
3566
3567         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568                 free_inode_recs_tree(src_cache);
3569                 return 0;
3570         }
3571
3572         while (1) {
3573                 cache = search_cache_extent(src_cache, 0);
3574                 if (!cache)
3575                         break;
3576                 node = container_of(cache, struct ptr_node, cache);
3577                 rec = node->data;
3578                 remove_cache_extent(src_cache, &node->cache);
3579                 free(node);
3580
3581                 ret = is_child_root(root, root->objectid, rec->ino);
3582                 if (ret < 0)
3583                         break;
3584                 else if (ret == 0)
3585                         goto skip;
3586
3587                 list_for_each_entry(backref, &rec->backrefs, list) {
3588                         BUG_ON(backref->found_inode_ref);
3589                         if (backref->found_dir_item)
3590                                 add_root_backref(dst_cache, rec->ino,
3591                                         root->root_key.objectid, backref->dir,
3592                                         backref->index, backref->name,
3593                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3594                                         backref->errors);
3595                         if (backref->found_dir_index)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3600                                         backref->errors);
3601                 }
3602 skip:
3603                 free_inode_rec(rec);
3604         }
3605         if (ret < 0)
3606                 return ret;
3607         return 0;
3608 }
3609
3610 static int check_root_refs(struct btrfs_root *root,
3611                            struct cache_tree *root_cache)
3612 {
3613         struct root_record *rec;
3614         struct root_record *ref_root;
3615         struct root_backref *backref;
3616         struct cache_extent *cache;
3617         int loop = 1;
3618         int ret;
3619         int error;
3620         int errors = 0;
3621
3622         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623         BUG_ON(IS_ERR(rec));
3624         rec->found_ref = 1;
3625
3626         /* fixme: this can not detect circular references */
3627         while (loop) {
3628                 loop = 0;
3629                 cache = search_cache_extent(root_cache, 0);
3630                 while (1) {
3631                         if (!cache)
3632                                 break;
3633                         rec = container_of(cache, struct root_record, cache);
3634                         cache = next_cache_extent(cache);
3635
3636                         if (rec->found_ref == 0)
3637                                 continue;
3638
3639                         list_for_each_entry(backref, &rec->backrefs, list) {
3640                                 if (!backref->reachable)
3641                                         continue;
3642
3643                                 ref_root = get_root_rec(root_cache,
3644                                                         backref->ref_root);
3645                                 BUG_ON(IS_ERR(ref_root));
3646                                 if (ref_root->found_ref > 0)
3647                                         continue;
3648
3649                                 backref->reachable = 0;
3650                                 rec->found_ref--;
3651                                 if (rec->found_ref == 0)
3652                                         loop = 1;
3653                         }
3654                 }
3655         }
3656
3657         cache = search_cache_extent(root_cache, 0);
3658         while (1) {
3659                 if (!cache)
3660                         break;
3661                 rec = container_of(cache, struct root_record, cache);
3662                 cache = next_cache_extent(cache);
3663
3664                 if (rec->found_ref == 0 &&
3665                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667                         ret = check_orphan_item(root->fs_info->tree_root,
3668                                                 rec->objectid);
3669                         if (ret == 0)
3670                                 continue;
3671
3672                         /*
3673                          * If we don't have a root item then we likely just have
3674                          * a dir item in a snapshot for this root but no actual
3675                          * ref key or anything so it's meaningless.
3676                          */
3677                         if (!rec->found_root_item)
3678                                 continue;
3679                         errors++;
3680                         fprintf(stderr, "fs tree %llu not referenced\n",
3681                                 (unsigned long long)rec->objectid);
3682                 }
3683
3684                 error = 0;
3685                 if (rec->found_ref > 0 && !rec->found_root_item)
3686                         error = 1;
3687                 list_for_each_entry(backref, &rec->backrefs, list) {
3688                         if (!backref->found_dir_item)
3689                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690                         if (!backref->found_dir_index)
3691                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692                         if (!backref->found_back_ref)
3693                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694                         if (!backref->found_forward_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3696                         if (backref->reachable && backref->errors)
3697                                 error = 1;
3698                 }
3699                 if (!error)
3700                         continue;
3701
3702                 errors++;
3703                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704                         (unsigned long long)rec->objectid, rec->found_ref,
3705                          rec->found_root_item ? "" : "not found");
3706
3707                 list_for_each_entry(backref, &rec->backrefs, list) {
3708                         if (!backref->reachable)
3709                                 continue;
3710                         if (!backref->errors && rec->found_root_item)
3711                                 continue;
3712                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713                                 " index %llu namelen %u name %s errors %x\n",
3714                                 (unsigned long long)backref->ref_root,
3715                                 (unsigned long long)backref->dir,
3716                                 (unsigned long long)backref->index,
3717                                 backref->namelen, backref->name,
3718                                 backref->errors);
3719                         print_ref_error(backref->errors);
3720                 }
3721         }
3722         return errors > 0 ? 1 : 0;
3723 }
3724
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726                             struct btrfs_key *key,
3727                             struct cache_tree *root_cache)
3728 {
3729         u64 dirid;
3730         u64 index;
3731         u32 len;
3732         u32 name_len;
3733         struct btrfs_root_ref *ref;
3734         char namebuf[BTRFS_NAME_LEN];
3735         int error;
3736
3737         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3738
3739         dirid = btrfs_root_ref_dirid(eb, ref);
3740         index = btrfs_root_ref_sequence(eb, ref);
3741         name_len = btrfs_root_ref_name_len(eb, ref);
3742
3743         if (name_len <= BTRFS_NAME_LEN) {
3744                 len = name_len;
3745                 error = 0;
3746         } else {
3747                 len = BTRFS_NAME_LEN;
3748                 error = REF_ERR_NAME_TOO_LONG;
3749         }
3750         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3751
3752         if (key->type == BTRFS_ROOT_REF_KEY) {
3753                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754                                  index, namebuf, len, key->type, error);
3755         } else {
3756                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         }
3759         return 0;
3760 }
3761
3762 static void free_corrupt_block(struct cache_extent *cache)
3763 {
3764         struct btrfs_corrupt_block *corrupt;
3765
3766         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767         free(corrupt);
3768 }
3769
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3771
3772 /*
3773  * Repair the btree of the given root.
3774  *
3775  * The fix is to remove the node key in corrupt_blocks cache_tree.
3776  * and rebalance the tree.
3777  * After the fix, the btree should be writeable.
3778  */
3779 static int repair_btree(struct btrfs_root *root,
3780                         struct cache_tree *corrupt_blocks)
3781 {
3782         struct btrfs_trans_handle *trans;
3783         struct btrfs_path path;
3784         struct btrfs_corrupt_block *corrupt;
3785         struct cache_extent *cache;
3786         struct btrfs_key key;
3787         u64 offset;
3788         int level;
3789         int ret = 0;
3790
3791         if (cache_tree_empty(corrupt_blocks))
3792                 return 0;
3793
3794         trans = btrfs_start_transaction(root, 1);
3795         if (IS_ERR(trans)) {
3796                 ret = PTR_ERR(trans);
3797                 fprintf(stderr, "Error starting transaction: %s\n",
3798                         strerror(-ret));
3799                 return ret;
3800         }
3801         btrfs_init_path(&path);
3802         cache = first_cache_extent(corrupt_blocks);
3803         while (cache) {
3804                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805                                        cache);
3806                 level = corrupt->level;
3807                 path.lowest_level = level;
3808                 key.objectid = corrupt->key.objectid;
3809                 key.type = corrupt->key.type;
3810                 key.offset = corrupt->key.offset;
3811
3812                 /*
3813                  * Here we don't want to do any tree balance, since it may
3814                  * cause a balance with corrupted brother leaf/node,
3815                  * so ins_len set to 0 here.
3816                  * Balance will be done after all corrupt node/leaf is deleted.
3817                  */
3818                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819                 if (ret < 0)
3820                         goto out;
3821                 offset = btrfs_node_blockptr(path.nodes[level],
3822                                              path.slots[level]);
3823
3824                 /* Remove the ptr */
3825                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826                 if (ret < 0)
3827                         goto out;
3828                 /*
3829                  * Remove the corresponding extent
3830                  * return value is not concerned.
3831                  */
3832                 btrfs_release_path(&path);
3833                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834                                         0, root->root_key.objectid,
3835                                         level - 1, 0);
3836                 cache = next_cache_extent(cache);
3837         }
3838
3839         /* Balance the btree using btrfs_search_slot() */
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 memcpy(&key, &corrupt->key, sizeof(key));
3845                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 /* return will always >0 since it won't find the item */
3849                 ret = 0;
3850                 btrfs_release_path(&path);
3851                 cache = next_cache_extent(cache);
3852         }
3853 out:
3854         btrfs_commit_transaction(trans, root);
3855         btrfs_release_path(&path);
3856         return ret;
3857 }
3858
3859 static int check_fs_root(struct btrfs_root *root,
3860                          struct cache_tree *root_cache,
3861                          struct walk_control *wc)
3862 {
3863         int ret = 0;
3864         int err = 0;
3865         int wret;
3866         int level;
3867         struct btrfs_path path;
3868         struct shared_node root_node;
3869         struct root_record *rec;
3870         struct btrfs_root_item *root_item = &root->root_item;
3871         struct cache_tree corrupt_blocks;
3872         struct orphan_data_extent *orphan;
3873         struct orphan_data_extent *tmp;
3874         enum btrfs_tree_block_status status;
3875         struct node_refs nrefs;
3876
3877         /*
3878          * Reuse the corrupt_block cache tree to record corrupted tree block
3879          *
3880          * Unlike the usage in extent tree check, here we do it in a per
3881          * fs/subvol tree base.
3882          */
3883         cache_tree_init(&corrupt_blocks);
3884         root->fs_info->corrupt_blocks = &corrupt_blocks;
3885
3886         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887                 rec = get_root_rec(root_cache, root->root_key.objectid);
3888                 BUG_ON(IS_ERR(rec));
3889                 if (btrfs_root_refs(root_item) > 0)
3890                         rec->found_root_item = 1;
3891         }
3892
3893         btrfs_init_path(&path);
3894         memset(&root_node, 0, sizeof(root_node));
3895         cache_tree_init(&root_node.root_cache);
3896         cache_tree_init(&root_node.inode_cache);
3897         memset(&nrefs, 0, sizeof(nrefs));
3898
3899         /* Move the orphan extent record to corresponding inode_record */
3900         list_for_each_entry_safe(orphan, tmp,
3901                                  &root->orphan_data_extents, list) {
3902                 struct inode_record *inode;
3903
3904                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3905                                       1);
3906                 BUG_ON(IS_ERR(inode));
3907                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908                 list_move(&orphan->list, &inode->orphan_extents);
3909         }
3910
3911         level = btrfs_header_level(root->node);
3912         memset(wc->nodes, 0, sizeof(wc->nodes));
3913         wc->nodes[level] = &root_node;
3914         wc->active_node = level;
3915         wc->root_level = level;
3916
3917         /* We may not have checked the root block, lets do that now */
3918         if (btrfs_is_leaf(root->node))
3919                 status = btrfs_check_leaf(root, NULL, root->node);
3920         else
3921                 status = btrfs_check_node(root, NULL, root->node);
3922         if (status != BTRFS_TREE_BLOCK_CLEAN)
3923                 return -EIO;
3924
3925         if (btrfs_root_refs(root_item) > 0 ||
3926             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927                 path.nodes[level] = root->node;
3928                 extent_buffer_get(root->node);
3929                 path.slots[level] = 0;
3930         } else {
3931                 struct btrfs_key key;
3932                 struct btrfs_disk_key found_key;
3933
3934                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935                 level = root_item->drop_level;
3936                 path.lowest_level = level;
3937                 if (level > btrfs_header_level(root->node) ||
3938                     level >= BTRFS_MAX_LEVEL) {
3939                         error("ignoring invalid drop level: %u", level);
3940                         goto skip_walking;
3941                 }
3942                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943                 if (wret < 0)
3944                         goto skip_walking;
3945                 btrfs_node_key(path.nodes[level], &found_key,
3946                                 path.slots[level]);
3947                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948                                         sizeof(found_key)));
3949         }
3950
3951         while (1) {
3952                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953                 if (wret < 0)
3954                         ret = wret;
3955                 if (wret != 0)
3956                         break;
3957
3958                 wret = walk_up_tree(root, &path, wc, &level);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963         }
3964 skip_walking:
3965         btrfs_release_path(&path);
3966
3967         if (!cache_tree_empty(&corrupt_blocks)) {
3968                 struct cache_extent *cache;
3969                 struct btrfs_corrupt_block *corrupt;
3970
3971                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972                        root->root_key.objectid);
3973                 cache = first_cache_extent(&corrupt_blocks);
3974                 while (cache) {
3975                         corrupt = container_of(cache,
3976                                                struct btrfs_corrupt_block,
3977                                                cache);
3978                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979                                cache->start, corrupt->level,
3980                                corrupt->key.objectid, corrupt->key.type,
3981                                corrupt->key.offset);
3982                         cache = next_cache_extent(cache);
3983                 }
3984                 if (repair) {
3985                         printf("Try to repair the btree for root %llu\n",
3986                                root->root_key.objectid);
3987                         ret = repair_btree(root, &corrupt_blocks);
3988                         if (ret < 0)
3989                                 fprintf(stderr, "Failed to repair btree: %s\n",
3990                                         strerror(-ret));
3991                         if (!ret)
3992                                 printf("Btree for root %llu is fixed\n",
3993                                        root->root_key.objectid);
3994                 }
3995         }
3996
3997         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998         if (err < 0)
3999                 ret = err;
4000
4001         if (root_node.current) {
4002                 root_node.current->checked = 1;
4003                 maybe_free_inode_rec(&root_node.inode_cache,
4004                                 root_node.current);
4005         }
4006
4007         err = check_inode_recs(root, &root_node.inode_cache);
4008         if (!ret)
4009                 ret = err;
4010
4011         free_corrupt_blocks_tree(&corrupt_blocks);
4012         root->fs_info->corrupt_blocks = NULL;
4013         free_orphan_data_extents(&root->orphan_data_extents);
4014         return ret;
4015 }
4016
4017 static int fs_root_objectid(u64 objectid)
4018 {
4019         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021                 return 1;
4022         return is_fstree(objectid);
4023 }
4024
4025 static int check_fs_roots(struct btrfs_root *root,
4026                           struct cache_tree *root_cache)
4027 {
4028         struct btrfs_path path;
4029         struct btrfs_key key;
4030         struct walk_control wc;
4031         struct extent_buffer *leaf, *tree_node;
4032         struct btrfs_root *tmp_root;
4033         struct btrfs_root *tree_root = root->fs_info->tree_root;
4034         int ret;
4035         int err = 0;
4036
4037         if (ctx.progress_enabled) {
4038                 ctx.tp = TASK_FS_ROOTS;
4039                 task_start(ctx.info);
4040         }
4041
4042         /*
4043          * Just in case we made any changes to the extent tree that weren't
4044          * reflected into the free space cache yet.
4045          */
4046         if (repair)
4047                 reset_cached_block_groups(root->fs_info);
4048         memset(&wc, 0, sizeof(wc));
4049         cache_tree_init(&wc.shared);
4050         btrfs_init_path(&path);
4051
4052 again:
4053         key.offset = 0;
4054         key.objectid = 0;
4055         key.type = BTRFS_ROOT_ITEM_KEY;
4056         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057         if (ret < 0) {
4058                 err = 1;
4059                 goto out;
4060         }
4061         tree_node = tree_root->node;
4062         while (1) {
4063                 if (tree_node != tree_root->node) {
4064                         free_root_recs_tree(root_cache);
4065                         btrfs_release_path(&path);
4066                         goto again;
4067                 }
4068                 leaf = path.nodes[0];
4069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070                         ret = btrfs_next_leaf(tree_root, &path);
4071                         if (ret) {
4072                                 if (ret < 0)
4073                                         err = 1;
4074                                 break;
4075                         }
4076                         leaf = path.nodes[0];
4077                 }
4078                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080                     fs_root_objectid(key.objectid)) {
4081                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082                                 tmp_root = btrfs_read_fs_root_no_cache(
4083                                                 root->fs_info, &key);
4084                         } else {
4085                                 key.offset = (u64)-1;
4086                                 tmp_root = btrfs_read_fs_root(
4087                                                 root->fs_info, &key);
4088                         }
4089                         if (IS_ERR(tmp_root)) {
4090                                 err = 1;
4091                                 goto next;
4092                         }
4093                         ret = check_fs_root(tmp_root, root_cache, &wc);
4094                         if (ret == -EAGAIN) {
4095                                 free_root_recs_tree(root_cache);
4096                                 btrfs_release_path(&path);
4097                                 goto again;
4098                         }
4099                         if (ret)
4100                                 err = 1;
4101                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102                                 btrfs_free_fs_root(tmp_root);
4103                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4105                         process_root_ref(leaf, path.slots[0], &key,
4106                                          root_cache);
4107                 }
4108 next:
4109                 path.slots[0]++;
4110         }
4111 out:
4112         btrfs_release_path(&path);
4113         if (err)
4114                 free_extent_cache_tree(&wc.shared);
4115         if (!cache_tree_empty(&wc.shared))
4116                 fprintf(stderr, "warning line %d\n", __LINE__);
4117
4118         task_stop(ctx.info);
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125  * INODE_REF/INODE_EXTREF match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4129  * @key:        the key of the DIR_ITEM/DIR_INDEX
4130  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4131  *              distinguish root_dir between normal dir/file
4132  * @name:       the name in the INODE_REF/INODE_EXTREF
4133  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4134  * @mode:       the st_mode of INODE_ITEM
4135  *
4136  * Return 0 if no error occurred.
4137  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139  * dir/file.
4140  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141  * not match for normal dir/file.
4142  */
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144                          struct btrfs_key *key, u64 index, char *name,
4145                          u32 namelen, u32 mode)
4146 {
4147         struct btrfs_path path;
4148         struct extent_buffer *node;
4149         struct btrfs_dir_item *di;
4150         struct btrfs_key location;
4151         char namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 name_len;
4156         u32 data_len;
4157         u8 filetype;
4158         int slot;
4159         int ret;
4160
4161         btrfs_init_path(&path);
4162         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163         if (ret < 0) {
4164                 ret = DIR_ITEM_MISSING;
4165                 goto out;
4166         }
4167
4168         /* Process root dir and goto out*/
4169         if (index == 0) {
4170                 if (ret == 0) {
4171                         ret = ROOT_DIR_ERROR;
4172                         error(
4173                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174                                 root->objectid,
4175                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4176                                         "REF" : "EXTREF",
4177                                 ref_key->objectid, ref_key->offset,
4178                                 key->type == BTRFS_DIR_ITEM_KEY ?
4179                                         "DIR_ITEM" : "DIR_INDEX");
4180                 } else {
4181                         ret = 0;
4182                 }
4183
4184                 goto out;
4185         }
4186
4187         /* Process normal file/dir */
4188         if (ret > 0) {
4189                 ret = DIR_ITEM_MISSING;
4190                 error(
4191                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192                         root->objectid,
4193                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194                         ref_key->objectid, ref_key->offset,
4195                         key->type == BTRFS_DIR_ITEM_KEY ?
4196                                 "DIR_ITEM" : "DIR_INDEX",
4197                         key->objectid, key->offset, namelen, name,
4198                         imode_to_type(mode));
4199                 goto out;
4200         }
4201
4202         /* Check whether inode_id/filetype/name match */
4203         node = path.nodes[0];
4204         slot = path.slots[0];
4205         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206         total = btrfs_item_size_nr(node, slot);
4207         while (cur < total) {
4208                 ret = DIR_ITEM_MISMATCH;
4209                 name_len = btrfs_dir_name_len(node, di);
4210                 data_len = btrfs_dir_data_len(node, di);
4211
4212                 btrfs_dir_item_key_to_cpu(node, di, &location);
4213                 if (location.objectid != ref_key->objectid ||
4214                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4215                     location.offset != 0)
4216                         goto next;
4217
4218                 filetype = btrfs_dir_type(node, di);
4219                 if (imode_to_type(mode) != filetype)
4220                         goto next;
4221
4222                 if (name_len <= BTRFS_NAME_LEN) {
4223                         len = name_len;
4224                 } else {
4225                         len = BTRFS_NAME_LEN;
4226                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227                         root->objectid,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                         "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, name_len);
4231                 }
4232                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233                 if (len != namelen || strncmp(namebuf, name, len))
4234                         goto next;
4235
4236                 ret = 0;
4237                 goto out;
4238 next:
4239                 len = sizeof(*di) + name_len + data_len;
4240                 di = (struct btrfs_dir_item *)((char *)di + len);
4241                 cur += len;
4242         }
4243         if (ret == DIR_ITEM_MISMATCH)
4244                 error(
4245                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246                         root->objectid,
4247                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248                         ref_key->objectid, ref_key->offset,
4249                         key->type == BTRFS_DIR_ITEM_KEY ?
4250                                 "DIR_ITEM" : "DIR_INDEX",
4251                         key->objectid, key->offset, namelen, name,
4252                         imode_to_type(mode));
4253 out:
4254         btrfs_release_path(&path);
4255         return ret;
4256 }
4257
4258 /*
4259  * Traverse the given INODE_REF and call find_dir_item() to find related
4260  * DIR_ITEM/DIR_INDEX.
4261  *
4262  * @root:       the root of the fs/file tree
4263  * @ref_key:    the key of the INODE_REF
4264  * @refs:       the count of INODE_REF
4265  * @mode:       the st_mode of INODE_ITEM
4266  *
4267  * Return 0 if no error occurred.
4268  */
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270                            struct extent_buffer *node, int slot, u64 *refs,
4271                            int mode)
4272 {
4273         struct btrfs_key key;
4274         struct btrfs_inode_ref *ref;
4275         char namebuf[BTRFS_NAME_LEN] = {0};
4276         u32 total;
4277         u32 cur = 0;
4278         u32 len;
4279         u32 name_len;
4280         u64 index;
4281         int ret, err = 0;
4282
4283         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284         total = btrfs_item_size_nr(node, slot);
4285
4286 next:
4287         /* Update inode ref count */
4288         (*refs)++;
4289
4290         index = btrfs_inode_ref_index(node, ref);
4291         name_len = btrfs_inode_ref_name_len(node, ref);
4292         if (name_len <= BTRFS_NAME_LEN) {
4293                 len = name_len;
4294         } else {
4295                 len = BTRFS_NAME_LEN;
4296                 warning("root %llu INODE_REF[%llu %llu] name too long",
4297                         root->objectid, ref_key->objectid, ref_key->offset);
4298         }
4299
4300         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4301
4302         /* Check root dir ref name */
4303         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305                       root->objectid, ref_key->objectid, ref_key->offset,
4306                       namebuf);
4307                 err |= ROOT_DIR_ERROR;
4308         }
4309
4310         /* Find related DIR_INDEX */
4311         key.objectid = ref_key->offset;
4312         key.type = BTRFS_DIR_INDEX_KEY;
4313         key.offset = index;
4314         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315         err |= ret;
4316
4317         /* Find related dir_item */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_ITEM_KEY;
4320         key.offset = btrfs_name_hash(namebuf, len);
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         len = sizeof(*ref) + name_len;
4325         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326         cur += len;
4327         if (cur < total)
4328                 goto next;
4329
4330         return err;
4331 }
4332
4333 /*
4334  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335  * DIR_ITEM/DIR_INDEX.
4336  *
4337  * @root:       the root of the fs/file tree
4338  * @ref_key:    the key of the INODE_EXTREF
4339  * @refs:       the count of INODE_EXTREF
4340  * @mode:       the st_mode of INODE_ITEM
4341  *
4342  * Return 0 if no error occurred.
4343  */
4344 static int check_inode_extref(struct btrfs_root *root,
4345                               struct btrfs_key *ref_key,
4346                               struct extent_buffer *node, int slot, u64 *refs,
4347                               int mode)
4348 {
4349         struct btrfs_key key;
4350         struct btrfs_inode_extref *extref;
4351         char namebuf[BTRFS_NAME_LEN] = {0};
4352         u32 total;
4353         u32 cur = 0;
4354         u32 len;
4355         u32 name_len;
4356         u64 index;
4357         u64 parent;
4358         int ret;
4359         int err = 0;
4360
4361         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362         total = btrfs_item_size_nr(node, slot);
4363
4364 next:
4365         /* update inode ref count */
4366         (*refs)++;
4367         name_len = btrfs_inode_extref_name_len(node, extref);
4368         index = btrfs_inode_extref_index(node, extref);
4369         parent = btrfs_inode_extref_parent(node, extref);
4370         if (name_len <= BTRFS_NAME_LEN) {
4371                 len = name_len;
4372         } else {
4373                 len = BTRFS_NAME_LEN;
4374                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375                         root->objectid, ref_key->objectid, ref_key->offset);
4376         }
4377         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4378
4379         /* Check root dir ref name */
4380         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382                       root->objectid, ref_key->objectid, ref_key->offset,
4383                       namebuf);
4384                 err |= ROOT_DIR_ERROR;
4385         }
4386
4387         /* find related dir_index */
4388         key.objectid = parent;
4389         key.type = BTRFS_DIR_INDEX_KEY;
4390         key.offset = index;
4391         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392         err |= ret;
4393
4394         /* find related dir_item */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_ITEM_KEY;
4397         key.offset = btrfs_name_hash(namebuf, len);
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         len = sizeof(*extref) + name_len;
4402         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403         cur += len;
4404
4405         if (cur < total)
4406                 goto next;
4407
4408         return err;
4409 }
4410
4411 /*
4412  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413  * DIR_ITEM/DIR_INDEX match.
4414  *
4415  * @root:       the root of the fs/file tree
4416  * @key:        the key of the INODE_REF/INODE_EXTREF
4417  * @name:       the name in the INODE_REF/INODE_EXTREF
4418  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4419  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420  * to (u64)-1
4421  * @ext_ref:    the EXTENDED_IREF feature
4422  *
4423  * Return 0 if no error occurred.
4424  * Return >0 for error bitmap
4425  */
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427                           char *name, int namelen, u64 index,
4428                           unsigned int ext_ref)
4429 {
4430         struct btrfs_path path;
4431         struct btrfs_inode_ref *ref;
4432         struct btrfs_inode_extref *extref;
4433         struct extent_buffer *node;
4434         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 ref_namelen;
4439         u64 ref_index;
4440         u64 parent;
4441         u64 dir_id;
4442         int slot;
4443         int ret;
4444
4445         btrfs_init_path(&path);
4446         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447         if (ret) {
4448                 ret = INODE_REF_MISSING;
4449                 goto extref;
4450         }
4451
4452         node = path.nodes[0];
4453         slot = path.slots[0];
4454
4455         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456         total = btrfs_item_size_nr(node, slot);
4457
4458         /* Iterate all entry of INODE_REF */
4459         while (cur < total) {
4460                 ret = INODE_REF_MISSING;
4461
4462                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463                 ref_index = btrfs_inode_ref_index(node, ref);
4464                 if (index != (u64)-1 && index != ref_index)
4465                         goto next_ref;
4466
4467                 if (ref_namelen <= BTRFS_NAME_LEN) {
4468                         len = ref_namelen;
4469                 } else {
4470                         len = BTRFS_NAME_LEN;
4471                         warning("root %llu INODE %s[%llu %llu] name too long",
4472                                 root->objectid,
4473                                 key->type == BTRFS_INODE_REF_KEY ?
4474                                         "REF" : "EXTREF",
4475                                 key->objectid, key->offset);
4476                 }
4477                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478                                    len);
4479
4480                 if (len != namelen || strncmp(ref_namebuf, name, len))
4481                         goto next_ref;
4482
4483                 ret = 0;
4484                 goto out;
4485 next_ref:
4486                 len = sizeof(*ref) + ref_namelen;
4487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488                 cur += len;
4489         }
4490
4491 extref:
4492         /* Skip if not support EXTENDED_IREF feature */
4493         if (!ext_ref)
4494                 goto out;
4495
4496         btrfs_release_path(&path);
4497         btrfs_init_path(&path);
4498
4499         dir_id = key->offset;
4500         key->type = BTRFS_INODE_EXTREF_KEY;
4501         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4502
4503         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504         if (ret) {
4505                 ret = INODE_REF_MISSING;
4506                 goto out;
4507         }
4508
4509         node = path.nodes[0];
4510         slot = path.slots[0];
4511
4512         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513         cur = 0;
4514         total = btrfs_item_size_nr(node, slot);
4515
4516         /* Iterate all entry of INODE_EXTREF */
4517         while (cur < total) {
4518                 ret = INODE_REF_MISSING;
4519
4520                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521                 ref_index = btrfs_inode_extref_index(node, extref);
4522                 parent = btrfs_inode_extref_parent(node, extref);
4523                 if (index != (u64)-1 && index != ref_index)
4524                         goto next_extref;
4525
4526                 if (parent != dir_id)
4527                         goto next_extref;
4528
4529                 if (ref_namelen <= BTRFS_NAME_LEN) {
4530                         len = ref_namelen;
4531                 } else {
4532                         len = BTRFS_NAME_LEN;
4533                         warning("root %llu INODE %s[%llu %llu] name too long",
4534                                 root->objectid,
4535                                 key->type == BTRFS_INODE_REF_KEY ?
4536                                         "REF" : "EXTREF",
4537                                 key->objectid, key->offset);
4538                 }
4539                 read_extent_buffer(node, ref_namebuf,
4540                                    (unsigned long)(extref + 1), len);
4541
4542                 if (len != namelen || strncmp(ref_namebuf, name, len))
4543                         goto next_extref;
4544
4545                 ret = 0;
4546                 goto out;
4547
4548 next_extref:
4549                 len = sizeof(*extref) + ref_namelen;
4550                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551                 cur += len;
4552
4553         }
4554 out:
4555         btrfs_release_path(&path);
4556         return ret;
4557 }
4558
4559 /*
4560  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4562  *
4563  * @root:       the root of the fs/file tree
4564  * @key:        the key of the INODE_REF/INODE_EXTREF
4565  * @size:       the st_size of the INODE_ITEM
4566  * @ext_ref:    the EXTENDED_IREF feature
4567  *
4568  * Return 0 if no error occurred.
4569  */
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571                           struct extent_buffer *node, int slot, u64 *size,
4572                           unsigned int ext_ref)
4573 {
4574         struct btrfs_dir_item *di;
4575         struct btrfs_inode_item *ii;
4576         struct btrfs_path path;
4577         struct btrfs_key location;
4578         char namebuf[BTRFS_NAME_LEN] = {0};
4579         u32 total;
4580         u32 cur = 0;
4581         u32 len;
4582         u32 name_len;
4583         u32 data_len;
4584         u8 filetype;
4585         u32 mode;
4586         u64 index;
4587         int ret;
4588         int err = 0;
4589
4590         /*
4591          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592          * ignore index check.
4593          */
4594         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4595
4596         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597         total = btrfs_item_size_nr(node, slot);
4598
4599         while (cur < total) {
4600                 data_len = btrfs_dir_data_len(node, di);
4601                 if (data_len)
4602                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604                               "DIR_ITEM" : "DIR_INDEX",
4605                               key->objectid, key->offset, data_len);
4606
4607                 name_len = btrfs_dir_name_len(node, di);
4608                 if (name_len <= BTRFS_NAME_LEN) {
4609                         len = name_len;
4610                 } else {
4611                         len = BTRFS_NAME_LEN;
4612                         warning("root %llu %s[%llu %llu] name too long",
4613                                 root->objectid,
4614                                 key->type == BTRFS_DIR_ITEM_KEY ?
4615                                 "DIR_ITEM" : "DIR_INDEX",
4616                                 key->objectid, key->offset);
4617                 }
4618                 (*size) += name_len;
4619
4620                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621                 filetype = btrfs_dir_type(node, di);
4622
4623                 btrfs_init_path(&path);
4624                 btrfs_dir_item_key_to_cpu(node, di, &location);
4625
4626                 /* Ignore related ROOT_ITEM check */
4627                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628                         goto next;
4629
4630                 /* Check relative INODE_ITEM(existence/filetype) */
4631                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632                 if (ret) {
4633                         err |= INODE_ITEM_MISSING;
4634                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637                               key->offset, location.objectid, name_len,
4638                               namebuf, filetype);
4639                         goto next;
4640                 }
4641
4642                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643                                     struct btrfs_inode_item);
4644                 mode = btrfs_inode_mode(path.nodes[0], ii);
4645
4646                 if (imode_to_type(mode) != filetype) {
4647                         err |= INODE_ITEM_MISMATCH;
4648                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651                               key->offset, name_len, namebuf, filetype);
4652                 }
4653
4654                 /* Check relative INODE_REF/INODE_EXTREF */
4655                 location.type = BTRFS_INODE_REF_KEY;
4656                 location.offset = key->objectid;
4657                 ret = find_inode_ref(root, &location, namebuf, len,
4658                                        index, ext_ref);
4659                 err |= ret;
4660                 if (ret & INODE_REF_MISSING)
4661                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, name_len, namebuf, filetype);
4665
4666 next:
4667                 btrfs_release_path(&path);
4668                 len = sizeof(*di) + name_len + data_len;
4669                 di = (struct btrfs_dir_item *)((char *)di + len);
4670                 cur += len;
4671
4672                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674                               root->objectid, key->objectid, key->offset);
4675                         break;
4676                 }
4677         }
4678
4679         return err;
4680 }
4681
4682 /*
4683  * Check file extent datasum/hole, update the size of the file extents,
4684  * check and update the last offset of the file extent.
4685  *
4686  * @root:       the root of fs/file tree.
4687  * @fkey:       the key of the file extent.
4688  * @nodatasum:  INODE_NODATASUM feature.
4689  * @size:       the sum of all EXTENT_DATA items size for this inode.
4690  * @end:        the offset of the last extent.
4691  *
4692  * Return 0 if no error occurred.
4693  */
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695                              struct extent_buffer *node, int slot,
4696                              unsigned int nodatasum, u64 *size, u64 *end)
4697 {
4698         struct btrfs_file_extent_item *fi;
4699         u64 disk_bytenr;
4700         u64 disk_num_bytes;
4701         u64 extent_num_bytes;
4702         u64 extent_offset;
4703         u64 csum_found;         /* In byte size, sectorsize aligned */
4704         u64 search_start;       /* Logical range start we search for csum */
4705         u64 search_len;         /* Logical range len we search for csum */
4706         unsigned int extent_type;
4707         unsigned int is_hole;
4708         int compressed = 0;
4709         int ret;
4710         int err = 0;
4711
4712         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4713
4714         /* Check inline extent */
4715         extent_type = btrfs_file_extent_type(node, fi);
4716         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717                 struct btrfs_item *e = btrfs_item_nr(slot);
4718                 u32 item_inline_len;
4719
4720                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722                 compressed = btrfs_file_extent_compression(node, fi);
4723                 if (extent_num_bytes == 0) {
4724                         error(
4725                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4726                                 root->objectid, fkey->objectid, fkey->offset);
4727                         err |= FILE_EXTENT_ERROR;
4728                 }
4729                 if (!compressed && extent_num_bytes != item_inline_len) {
4730                         error(
4731                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4732                                 root->objectid, fkey->objectid, fkey->offset,
4733                                 extent_num_bytes, item_inline_len);
4734                         err |= FILE_EXTENT_ERROR;
4735                 }
4736                 *size += extent_num_bytes;
4737                 return err;
4738         }
4739
4740         /* Check extent type */
4741         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4742                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4743                 err |= FILE_EXTENT_ERROR;
4744                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4745                       root->objectid, fkey->objectid, fkey->offset);
4746                 return err;
4747         }
4748
4749         /* Check REG_EXTENT/PREALLOC_EXTENT */
4750         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4751         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4752         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4753         extent_offset = btrfs_file_extent_offset(node, fi);
4754         compressed = btrfs_file_extent_compression(node, fi);
4755         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4756
4757         /*
4758          * Check EXTENT_DATA csum
4759          *
4760          * For plain (uncompressed) extent, we should only check the range
4761          * we're referring to, as it's possible that part of prealloc extent
4762          * has been written, and has csum:
4763          *
4764          * |<--- Original large preallocated extent A ---->|
4765          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4766          *      No csum                         Has csum
4767          *
4768          * For compressed extent, we should check the whole range.
4769          */
4770         if (!compressed) {
4771                 search_start = disk_bytenr + extent_offset;
4772                 search_len = extent_num_bytes;
4773         } else {
4774                 search_start = disk_bytenr;
4775                 search_len = disk_num_bytes;
4776         }
4777         ret = count_csum_range(root, search_start, search_len, &csum_found);
4778         if (csum_found > 0 && nodatasum) {
4779                 err |= ODD_CSUM_ITEM;
4780                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4781                       root->objectid, fkey->objectid, fkey->offset);
4782         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4783                    !is_hole && (ret < 0 || csum_found < search_len)) {
4784                 err |= CSUM_ITEM_MISSING;
4785                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4786                       root->objectid, fkey->objectid, fkey->offset,
4787                       csum_found, search_len);
4788         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4789                 err |= ODD_CSUM_ITEM;
4790                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4791                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4792         }
4793
4794         /* Check EXTENT_DATA hole */
4795         if (no_holes && is_hole) {
4796                 err |= FILE_EXTENT_ERROR;
4797                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4798                       root->objectid, fkey->objectid, fkey->offset);
4799         } else if (!no_holes && *end != fkey->offset) {
4800                 err |= FILE_EXTENT_ERROR;
4801                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4802                       root->objectid, fkey->objectid, fkey->offset);
4803         }
4804
4805         *end += extent_num_bytes;
4806         if (!is_hole)
4807                 *size += extent_num_bytes;
4808
4809         return err;
4810 }
4811
4812 /*
4813  * Check INODE_ITEM and related ITEMs (the same inode number)
4814  * 1. check link count
4815  * 2. check inode ref/extref
4816  * 3. check dir item/index
4817  *
4818  * @ext_ref:    the EXTENDED_IREF feature
4819  *
4820  * Return 0 if no error occurred.
4821  * Return >0 for error or hit the traversal is done(by error bitmap)
4822  */
4823 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4824                             unsigned int ext_ref)
4825 {
4826         struct extent_buffer *node;
4827         struct btrfs_inode_item *ii;
4828         struct btrfs_key key;
4829         u64 inode_id;
4830         u32 mode;
4831         u64 nlink;
4832         u64 nbytes;
4833         u64 isize;
4834         u64 size = 0;
4835         u64 refs = 0;
4836         u64 extent_end = 0;
4837         u64 extent_size = 0;
4838         unsigned int dir;
4839         unsigned int nodatasum;
4840         int slot;
4841         int ret;
4842         int err = 0;
4843
4844         node = path->nodes[0];
4845         slot = path->slots[0];
4846
4847         btrfs_item_key_to_cpu(node, &key, slot);
4848         inode_id = key.objectid;
4849
4850         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4851                 ret = btrfs_next_item(root, path);
4852                 if (ret > 0)
4853                         err |= LAST_ITEM;
4854                 return err;
4855         }
4856
4857         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4858         isize = btrfs_inode_size(node, ii);
4859         nbytes = btrfs_inode_nbytes(node, ii);
4860         mode = btrfs_inode_mode(node, ii);
4861         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4862         nlink = btrfs_inode_nlink(node, ii);
4863         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4864
4865         while (1) {
4866                 ret = btrfs_next_item(root, path);
4867                 if (ret < 0) {
4868                         /* out will fill 'err' rusing current statistics */
4869                         goto out;
4870                 } else if (ret > 0) {
4871                         err |= LAST_ITEM;
4872                         goto out;
4873                 }
4874
4875                 node = path->nodes[0];
4876                 slot = path->slots[0];
4877                 btrfs_item_key_to_cpu(node, &key, slot);
4878                 if (key.objectid != inode_id)
4879                         goto out;
4880
4881                 switch (key.type) {
4882                 case BTRFS_INODE_REF_KEY:
4883                         ret = check_inode_ref(root, &key, node, slot, &refs,
4884                                               mode);
4885                         err |= ret;
4886                         break;
4887                 case BTRFS_INODE_EXTREF_KEY:
4888                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4889                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4890                                         root->objectid, key.objectid,
4891                                         key.offset);
4892                         ret = check_inode_extref(root, &key, node, slot, &refs,
4893                                                  mode);
4894                         err |= ret;
4895                         break;
4896                 case BTRFS_DIR_ITEM_KEY:
4897                 case BTRFS_DIR_INDEX_KEY:
4898                         if (!dir) {
4899                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4900                                         root->objectid, inode_id,
4901                                         imode_to_type(mode), key.objectid,
4902                                         key.offset);
4903                         }
4904                         ret = check_dir_item(root, &key, node, slot, &size,
4905                                              ext_ref);
4906                         err |= ret;
4907                         break;
4908                 case BTRFS_EXTENT_DATA_KEY:
4909                         if (dir) {
4910                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4911                                         root->objectid, inode_id, key.objectid,
4912                                         key.offset);
4913                         }
4914                         ret = check_file_extent(root, &key, node, slot,
4915                                                 nodatasum, &extent_size,
4916                                                 &extent_end);
4917                         err |= ret;
4918                         break;
4919                 case BTRFS_XATTR_ITEM_KEY:
4920                         break;
4921                 default:
4922                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4923                               key.objectid, key.type, key.offset);
4924                 }
4925         }
4926
4927 out:
4928         /* verify INODE_ITEM nlink/isize/nbytes */
4929         if (dir) {
4930                 if (nlink != 1) {
4931                         err |= LINK_COUNT_ERROR;
4932                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4933                               root->objectid, inode_id, nlink);
4934                 }
4935
4936                 /*
4937                  * Just a warning, as dir inode nbytes is just an
4938                  * instructive value.
4939                  */
4940                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4941                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4942                                 root->objectid, inode_id, root->nodesize);
4943                 }
4944
4945                 if (isize != size) {
4946                         err |= ISIZE_ERROR;
4947                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4948                               root->objectid, inode_id, isize, size);
4949                 }
4950         } else {
4951                 if (nlink != refs) {
4952                         err |= LINK_COUNT_ERROR;
4953                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4954                               root->objectid, inode_id, nlink, refs);
4955                 } else if (!nlink) {
4956                         err |= ORPHAN_ITEM;
4957                 }
4958
4959                 if (!nbytes && !no_holes && extent_end < isize) {
4960                         err |= NBYTES_ERROR;
4961                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4962                               root->objectid, inode_id, isize);
4963                 }
4964
4965                 if (nbytes != extent_size) {
4966                         err |= NBYTES_ERROR;
4967                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4968                               root->objectid, inode_id, nbytes, extent_size);
4969                 }
4970         }
4971
4972         return err;
4973 }
4974
4975 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4976 {
4977         struct btrfs_path path;
4978         struct btrfs_key key;
4979         int err = 0;
4980         int ret;
4981
4982         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4983         key.type = BTRFS_INODE_ITEM_KEY;
4984         key.offset = 0;
4985
4986         /* For root being dropped, we don't need to check first inode */
4987         if (btrfs_root_refs(&root->root_item) == 0 &&
4988             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4989             key.objectid)
4990                 return 0;
4991
4992         btrfs_init_path(&path);
4993
4994         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4995         if (ret < 0)
4996                 goto out;
4997         if (ret > 0) {
4998                 ret = 0;
4999                 err |= INODE_ITEM_MISSING;
5000         }
5001
5002         err |= check_inode_item(root, &path, ext_ref);
5003         err &= ~LAST_ITEM;
5004         if (err && !ret)
5005                 ret = -EIO;
5006 out:
5007         btrfs_release_path(&path);
5008         return ret;
5009 }
5010
5011 /*
5012  * Iterate all item on the tree and call check_inode_item() to check.
5013  *
5014  * @root:       the root of the tree to be checked.
5015  * @ext_ref:    the EXTENDED_IREF feature
5016  *
5017  * Return 0 if no error found.
5018  * Return <0 for error.
5019  */
5020 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5021 {
5022         struct btrfs_path path;
5023         struct node_refs nrefs;
5024         struct btrfs_root_item *root_item = &root->root_item;
5025         int ret, wret;
5026         int level;
5027
5028         /*
5029          * We need to manually check the first inode item(256)
5030          * As the following traversal function will only start from
5031          * the first inode item in the leaf, if inode item(256) is missing
5032          * we will just skip it forever.
5033          */
5034         ret = check_fs_first_inode(root, ext_ref);
5035         if (ret < 0)
5036                 return ret;
5037
5038         memset(&nrefs, 0, sizeof(nrefs));
5039         level = btrfs_header_level(root->node);
5040         btrfs_init_path(&path);
5041
5042         if (btrfs_root_refs(root_item) > 0 ||
5043             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5044                 path.nodes[level] = root->node;
5045                 path.slots[level] = 0;
5046                 extent_buffer_get(root->node);
5047         } else {
5048                 struct btrfs_key key;
5049
5050                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5051                 level = root_item->drop_level;
5052                 path.lowest_level = level;
5053                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5054                 if (ret < 0)
5055                         goto out;
5056                 ret = 0;
5057         }
5058
5059         while (1) {
5060                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5061                 if (wret < 0)
5062                         ret = wret;
5063                 if (wret != 0)
5064                         break;
5065
5066                 wret = walk_up_tree_v2(root, &path, &level);
5067                 if (wret < 0)
5068                         ret = wret;
5069                 if (wret != 0)
5070                         break;
5071         }
5072
5073 out:
5074         btrfs_release_path(&path);
5075         return ret;
5076 }
5077
5078 /*
5079  * Find the relative ref for root_ref and root_backref.
5080  *
5081  * @root:       the root of the root tree.
5082  * @ref_key:    the key of the root ref.
5083  *
5084  * Return 0 if no error occurred.
5085  */
5086 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5087                           struct extent_buffer *node, int slot)
5088 {
5089         struct btrfs_path path;
5090         struct btrfs_key key;
5091         struct btrfs_root_ref *ref;
5092         struct btrfs_root_ref *backref;
5093         char ref_name[BTRFS_NAME_LEN] = {0};
5094         char backref_name[BTRFS_NAME_LEN] = {0};
5095         u64 ref_dirid;
5096         u64 ref_seq;
5097         u32 ref_namelen;
5098         u64 backref_dirid;
5099         u64 backref_seq;
5100         u32 backref_namelen;
5101         u32 len;
5102         int ret;
5103         int err = 0;
5104
5105         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5106         ref_dirid = btrfs_root_ref_dirid(node, ref);
5107         ref_seq = btrfs_root_ref_sequence(node, ref);
5108         ref_namelen = btrfs_root_ref_name_len(node, ref);
5109
5110         if (ref_namelen <= BTRFS_NAME_LEN) {
5111                 len = ref_namelen;
5112         } else {
5113                 len = BTRFS_NAME_LEN;
5114                 warning("%s[%llu %llu] ref_name too long",
5115                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5116                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5117                         ref_key->offset);
5118         }
5119         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5120
5121         /* Find relative root_ref */
5122         key.objectid = ref_key->offset;
5123         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5124         key.offset = ref_key->objectid;
5125
5126         btrfs_init_path(&path);
5127         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5128         if (ret) {
5129                 err |= ROOT_REF_MISSING;
5130                 error("%s[%llu %llu] couldn't find relative ref",
5131                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5132                       "ROOT_REF" : "ROOT_BACKREF",
5133                       ref_key->objectid, ref_key->offset);
5134                 goto out;
5135         }
5136
5137         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5138                                  struct btrfs_root_ref);
5139         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5140         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5141         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5142
5143         if (backref_namelen <= BTRFS_NAME_LEN) {
5144                 len = backref_namelen;
5145         } else {
5146                 len = BTRFS_NAME_LEN;
5147                 warning("%s[%llu %llu] ref_name too long",
5148                         key.type == BTRFS_ROOT_REF_KEY ?
5149                         "ROOT_REF" : "ROOT_BACKREF",
5150                         key.objectid, key.offset);
5151         }
5152         read_extent_buffer(path.nodes[0], backref_name,
5153                            (unsigned long)(backref + 1), len);
5154
5155         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5156             ref_namelen != backref_namelen ||
5157             strncmp(ref_name, backref_name, len)) {
5158                 err |= ROOT_REF_MISMATCH;
5159                 error("%s[%llu %llu] mismatch relative ref",
5160                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5161                       "ROOT_REF" : "ROOT_BACKREF",
5162                       ref_key->objectid, ref_key->offset);
5163         }
5164 out:
5165         btrfs_release_path(&path);
5166         return err;
5167 }
5168
5169 /*
5170  * Check all fs/file tree in low_memory mode.
5171  *
5172  * 1. for fs tree root item, call check_fs_root_v2()
5173  * 2. for fs tree root ref/backref, call check_root_ref()
5174  *
5175  * Return 0 if no error occurred.
5176  */
5177 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5178 {
5179         struct btrfs_root *tree_root = fs_info->tree_root;
5180         struct btrfs_root *cur_root = NULL;
5181         struct btrfs_path path;
5182         struct btrfs_key key;
5183         struct extent_buffer *node;
5184         unsigned int ext_ref;
5185         int slot;
5186         int ret;
5187         int err = 0;
5188
5189         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5190
5191         btrfs_init_path(&path);
5192         key.objectid = BTRFS_FS_TREE_OBJECTID;
5193         key.offset = 0;
5194         key.type = BTRFS_ROOT_ITEM_KEY;
5195
5196         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5197         if (ret < 0) {
5198                 err = ret;
5199                 goto out;
5200         } else if (ret > 0) {
5201                 err = -ENOENT;
5202                 goto out;
5203         }
5204
5205         while (1) {
5206                 node = path.nodes[0];
5207                 slot = path.slots[0];
5208                 btrfs_item_key_to_cpu(node, &key, slot);
5209                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5210                         goto out;
5211                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5212                     fs_root_objectid(key.objectid)) {
5213                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5214                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5215                                                                        &key);
5216                         } else {
5217                                 key.offset = (u64)-1;
5218                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5219                         }
5220
5221                         if (IS_ERR(cur_root)) {
5222                                 error("Fail to read fs/subvol tree: %lld",
5223                                       key.objectid);
5224                                 err = -EIO;
5225                                 goto next;
5226                         }
5227
5228                         ret = check_fs_root_v2(cur_root, ext_ref);
5229                         err |= ret;
5230
5231                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5232                                 btrfs_free_fs_root(cur_root);
5233                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5234                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5235                         ret = check_root_ref(tree_root, &key, node, slot);
5236                         err |= ret;
5237                 }
5238 next:
5239                 ret = btrfs_next_item(tree_root, &path);
5240                 if (ret > 0)
5241                         goto out;
5242                 if (ret < 0) {
5243                         err = ret;
5244                         goto out;
5245                 }
5246         }
5247
5248 out:
5249         btrfs_release_path(&path);
5250         return err;
5251 }
5252
5253 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5254 {
5255         struct list_head *cur = rec->backrefs.next;
5256         struct extent_backref *back;
5257         struct tree_backref *tback;
5258         struct data_backref *dback;
5259         u64 found = 0;
5260         int err = 0;
5261
5262         while(cur != &rec->backrefs) {
5263                 back = to_extent_backref(cur);
5264                 cur = cur->next;
5265                 if (!back->found_extent_tree) {
5266                         err = 1;
5267                         if (!print_errs)
5268                                 goto out;
5269                         if (back->is_data) {
5270                                 dback = to_data_backref(back);
5271                                 fprintf(stderr, "Backref %llu %s %llu"
5272                                         " owner %llu offset %llu num_refs %lu"
5273                                         " not found in extent tree\n",
5274                                         (unsigned long long)rec->start,
5275                                         back->full_backref ?
5276                                         "parent" : "root",
5277                                         back->full_backref ?
5278                                         (unsigned long long)dback->parent:
5279                                         (unsigned long long)dback->root,
5280                                         (unsigned long long)dback->owner,
5281                                         (unsigned long long)dback->offset,
5282                                         (unsigned long)dback->num_refs);
5283                         } else {
5284                                 tback = to_tree_backref(back);
5285                                 fprintf(stderr, "Backref %llu parent %llu"
5286                                         " root %llu not found in extent tree\n",
5287                                         (unsigned long long)rec->start,
5288                                         (unsigned long long)tback->parent,
5289                                         (unsigned long long)tback->root);
5290                         }
5291                 }
5292                 if (!back->is_data && !back->found_ref) {
5293                         err = 1;
5294                         if (!print_errs)
5295                                 goto out;
5296                         tback = to_tree_backref(back);
5297                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5298                                 (unsigned long long)rec->start,
5299                                 back->full_backref ? "parent" : "root",
5300                                 back->full_backref ?
5301                                 (unsigned long long)tback->parent :
5302                                 (unsigned long long)tback->root, back);
5303                 }
5304                 if (back->is_data) {
5305                         dback = to_data_backref(back);
5306                         if (dback->found_ref != dback->num_refs) {
5307                                 err = 1;
5308                                 if (!print_errs)
5309                                         goto out;
5310                                 fprintf(stderr, "Incorrect local backref count"
5311                                         " on %llu %s %llu owner %llu"
5312                                         " offset %llu found %u wanted %u back %p\n",
5313                                         (unsigned long long)rec->start,
5314                                         back->full_backref ?
5315                                         "parent" : "root",
5316                                         back->full_backref ?
5317                                         (unsigned long long)dback->parent:
5318                                         (unsigned long long)dback->root,
5319                                         (unsigned long long)dback->owner,
5320                                         (unsigned long long)dback->offset,
5321                                         dback->found_ref, dback->num_refs, back);
5322                         }
5323                         if (dback->disk_bytenr != rec->start) {
5324                                 err = 1;
5325                                 if (!print_errs)
5326                                         goto out;
5327                                 fprintf(stderr, "Backref disk bytenr does not"
5328                                         " match extent record, bytenr=%llu, "
5329                                         "ref bytenr=%llu\n",
5330                                         (unsigned long long)rec->start,
5331                                         (unsigned long long)dback->disk_bytenr);
5332                         }
5333
5334                         if (dback->bytes != rec->nr) {
5335                                 err = 1;
5336                                 if (!print_errs)
5337                                         goto out;
5338                                 fprintf(stderr, "Backref bytes do not match "
5339                                         "extent backref, bytenr=%llu, ref "
5340                                         "bytes=%llu, backref bytes=%llu\n",
5341                                         (unsigned long long)rec->start,
5342                                         (unsigned long long)rec->nr,
5343                                         (unsigned long long)dback->bytes);
5344                         }
5345                 }
5346                 if (!back->is_data) {
5347                         found += 1;
5348                 } else {
5349                         dback = to_data_backref(back);
5350                         found += dback->found_ref;
5351                 }
5352         }
5353         if (found != rec->refs) {
5354                 err = 1;
5355                 if (!print_errs)
5356                         goto out;
5357                 fprintf(stderr, "Incorrect global backref count "
5358                         "on %llu found %llu wanted %llu\n",
5359                         (unsigned long long)rec->start,
5360                         (unsigned long long)found,
5361                         (unsigned long long)rec->refs);
5362         }
5363 out:
5364         return err;
5365 }
5366
5367 static int free_all_extent_backrefs(struct extent_record *rec)
5368 {
5369         struct extent_backref *back;
5370         struct list_head *cur;
5371         while (!list_empty(&rec->backrefs)) {
5372                 cur = rec->backrefs.next;
5373                 back = to_extent_backref(cur);
5374                 list_del(cur);
5375                 free(back);
5376         }
5377         return 0;
5378 }
5379
5380 static void free_extent_record_cache(struct cache_tree *extent_cache)
5381 {
5382         struct cache_extent *cache;
5383         struct extent_record *rec;
5384
5385         while (1) {
5386                 cache = first_cache_extent(extent_cache);
5387                 if (!cache)
5388                         break;
5389                 rec = container_of(cache, struct extent_record, cache);
5390                 remove_cache_extent(extent_cache, cache);
5391                 free_all_extent_backrefs(rec);
5392                 free(rec);
5393         }
5394 }
5395
5396 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5397                                  struct extent_record *rec)
5398 {
5399         if (rec->content_checked && rec->owner_ref_checked &&
5400             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5401             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5402             !rec->bad_full_backref && !rec->crossing_stripes &&
5403             !rec->wrong_chunk_type) {
5404                 remove_cache_extent(extent_cache, &rec->cache);
5405                 free_all_extent_backrefs(rec);
5406                 list_del_init(&rec->list);
5407                 free(rec);
5408         }
5409         return 0;
5410 }
5411
5412 static int check_owner_ref(struct btrfs_root *root,
5413                             struct extent_record *rec,
5414                             struct extent_buffer *buf)
5415 {
5416         struct extent_backref *node;
5417         struct tree_backref *back;
5418         struct btrfs_root *ref_root;
5419         struct btrfs_key key;
5420         struct btrfs_path path;
5421         struct extent_buffer *parent;
5422         int level;
5423         int found = 0;
5424         int ret;
5425
5426         list_for_each_entry(node, &rec->backrefs, list) {
5427                 if (node->is_data)
5428                         continue;
5429                 if (!node->found_ref)
5430                         continue;
5431                 if (node->full_backref)
5432                         continue;
5433                 back = to_tree_backref(node);
5434                 if (btrfs_header_owner(buf) == back->root)
5435                         return 0;
5436         }
5437         BUG_ON(rec->is_root);
5438
5439         /* try to find the block by search corresponding fs tree */
5440         key.objectid = btrfs_header_owner(buf);
5441         key.type = BTRFS_ROOT_ITEM_KEY;
5442         key.offset = (u64)-1;
5443
5444         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5445         if (IS_ERR(ref_root))
5446                 return 1;
5447
5448         level = btrfs_header_level(buf);
5449         if (level == 0)
5450                 btrfs_item_key_to_cpu(buf, &key, 0);
5451         else
5452                 btrfs_node_key_to_cpu(buf, &key, 0);
5453
5454         btrfs_init_path(&path);
5455         path.lowest_level = level + 1;
5456         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5457         if (ret < 0)
5458                 return 0;
5459
5460         parent = path.nodes[level + 1];
5461         if (parent && buf->start == btrfs_node_blockptr(parent,
5462                                                         path.slots[level + 1]))
5463                 found = 1;
5464
5465         btrfs_release_path(&path);
5466         return found ? 0 : 1;
5467 }
5468
5469 static int is_extent_tree_record(struct extent_record *rec)
5470 {
5471         struct list_head *cur = rec->backrefs.next;
5472         struct extent_backref *node;
5473         struct tree_backref *back;
5474         int is_extent = 0;
5475
5476         while(cur != &rec->backrefs) {
5477                 node = to_extent_backref(cur);
5478                 cur = cur->next;
5479                 if (node->is_data)
5480                         return 0;
5481                 back = to_tree_backref(node);
5482                 if (node->full_backref)
5483                         return 0;
5484                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5485                         is_extent = 1;
5486         }
5487         return is_extent;
5488 }
5489
5490
5491 static int record_bad_block_io(struct btrfs_fs_info *info,
5492                                struct cache_tree *extent_cache,
5493                                u64 start, u64 len)
5494 {
5495         struct extent_record *rec;
5496         struct cache_extent *cache;
5497         struct btrfs_key key;
5498
5499         cache = lookup_cache_extent(extent_cache, start, len);
5500         if (!cache)
5501                 return 0;
5502
5503         rec = container_of(cache, struct extent_record, cache);
5504         if (!is_extent_tree_record(rec))
5505                 return 0;
5506
5507         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5508         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5509 }
5510
5511 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5512                        struct extent_buffer *buf, int slot)
5513 {
5514         if (btrfs_header_level(buf)) {
5515                 struct btrfs_key_ptr ptr1, ptr2;
5516
5517                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5518                                    sizeof(struct btrfs_key_ptr));
5519                 read_extent_buffer(buf, &ptr2,
5520                                    btrfs_node_key_ptr_offset(slot + 1),
5521                                    sizeof(struct btrfs_key_ptr));
5522                 write_extent_buffer(buf, &ptr1,
5523                                     btrfs_node_key_ptr_offset(slot + 1),
5524                                     sizeof(struct btrfs_key_ptr));
5525                 write_extent_buffer(buf, &ptr2,
5526                                     btrfs_node_key_ptr_offset(slot),
5527                                     sizeof(struct btrfs_key_ptr));
5528                 if (slot == 0) {
5529                         struct btrfs_disk_key key;
5530                         btrfs_node_key(buf, &key, 0);
5531                         btrfs_fixup_low_keys(root, path, &key,
5532                                              btrfs_header_level(buf) + 1);
5533                 }
5534         } else {
5535                 struct btrfs_item *item1, *item2;
5536                 struct btrfs_key k1, k2;
5537                 char *item1_data, *item2_data;
5538                 u32 item1_offset, item2_offset, item1_size, item2_size;
5539
5540                 item1 = btrfs_item_nr(slot);
5541                 item2 = btrfs_item_nr(slot + 1);
5542                 btrfs_item_key_to_cpu(buf, &k1, slot);
5543                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5544                 item1_offset = btrfs_item_offset(buf, item1);
5545                 item2_offset = btrfs_item_offset(buf, item2);
5546                 item1_size = btrfs_item_size(buf, item1);
5547                 item2_size = btrfs_item_size(buf, item2);
5548
5549                 item1_data = malloc(item1_size);
5550                 if (!item1_data)
5551                         return -ENOMEM;
5552                 item2_data = malloc(item2_size);
5553                 if (!item2_data) {
5554                         free(item1_data);
5555                         return -ENOMEM;
5556                 }
5557
5558                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5559                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5560
5561                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5562                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5563                 free(item1_data);
5564                 free(item2_data);
5565
5566                 btrfs_set_item_offset(buf, item1, item2_offset);
5567                 btrfs_set_item_offset(buf, item2, item1_offset);
5568                 btrfs_set_item_size(buf, item1, item2_size);
5569                 btrfs_set_item_size(buf, item2, item1_size);
5570
5571                 path->slots[0] = slot;
5572                 btrfs_set_item_key_unsafe(root, path, &k2);
5573                 path->slots[0] = slot + 1;
5574                 btrfs_set_item_key_unsafe(root, path, &k1);
5575         }
5576         return 0;
5577 }
5578
5579 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5580 {
5581         struct extent_buffer *buf;
5582         struct btrfs_key k1, k2;
5583         int i;
5584         int level = path->lowest_level;
5585         int ret = -EIO;
5586
5587         buf = path->nodes[level];
5588         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5589                 if (level) {
5590                         btrfs_node_key_to_cpu(buf, &k1, i);
5591                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5592                 } else {
5593                         btrfs_item_key_to_cpu(buf, &k1, i);
5594                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5595                 }
5596                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5597                         continue;
5598                 ret = swap_values(root, path, buf, i);
5599                 if (ret)
5600                         break;
5601                 btrfs_mark_buffer_dirty(buf);
5602                 i = 0;
5603         }
5604         return ret;
5605 }
5606
5607 static int delete_bogus_item(struct btrfs_root *root,
5608                              struct btrfs_path *path,
5609                              struct extent_buffer *buf, int slot)
5610 {
5611         struct btrfs_key key;
5612         int nritems = btrfs_header_nritems(buf);
5613
5614         btrfs_item_key_to_cpu(buf, &key, slot);
5615
5616         /* These are all the keys we can deal with missing. */
5617         if (key.type != BTRFS_DIR_INDEX_KEY &&
5618             key.type != BTRFS_EXTENT_ITEM_KEY &&
5619             key.type != BTRFS_METADATA_ITEM_KEY &&
5620             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5621             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5622                 return -1;
5623
5624         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5625                (unsigned long long)key.objectid, key.type,
5626                (unsigned long long)key.offset, slot, buf->start);
5627         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5628                               btrfs_item_nr_offset(slot + 1),
5629                               sizeof(struct btrfs_item) *
5630                               (nritems - slot - 1));
5631         btrfs_set_header_nritems(buf, nritems - 1);
5632         if (slot == 0) {
5633                 struct btrfs_disk_key disk_key;
5634
5635                 btrfs_item_key(buf, &disk_key, 0);
5636                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5637         }
5638         btrfs_mark_buffer_dirty(buf);
5639         return 0;
5640 }
5641
5642 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5643 {
5644         struct extent_buffer *buf;
5645         int i;
5646         int ret = 0;
5647
5648         /* We should only get this for leaves */
5649         BUG_ON(path->lowest_level);
5650         buf = path->nodes[0];
5651 again:
5652         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5653                 unsigned int shift = 0, offset;
5654
5655                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5656                     BTRFS_LEAF_DATA_SIZE(root)) {
5657                         if (btrfs_item_end_nr(buf, i) >
5658                             BTRFS_LEAF_DATA_SIZE(root)) {
5659                                 ret = delete_bogus_item(root, path, buf, i);
5660                                 if (!ret)
5661                                         goto again;
5662                                 fprintf(stderr, "item is off the end of the "
5663                                         "leaf, can't fix\n");
5664                                 ret = -EIO;
5665                                 break;
5666                         }
5667                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5668                                 btrfs_item_end_nr(buf, i);
5669                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5670                            btrfs_item_offset_nr(buf, i - 1)) {
5671                         if (btrfs_item_end_nr(buf, i) >
5672                             btrfs_item_offset_nr(buf, i - 1)) {
5673                                 ret = delete_bogus_item(root, path, buf, i);
5674                                 if (!ret)
5675                                         goto again;
5676                                 fprintf(stderr, "items overlap, can't fix\n");
5677                                 ret = -EIO;
5678                                 break;
5679                         }
5680                         shift = btrfs_item_offset_nr(buf, i - 1) -
5681                                 btrfs_item_end_nr(buf, i);
5682                 }
5683                 if (!shift)
5684                         continue;
5685
5686                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5687                        i, shift, (unsigned long long)buf->start);
5688                 offset = btrfs_item_offset_nr(buf, i);
5689                 memmove_extent_buffer(buf,
5690                                       btrfs_leaf_data(buf) + offset + shift,
5691                                       btrfs_leaf_data(buf) + offset,
5692                                       btrfs_item_size_nr(buf, i));
5693                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5694                                       offset + shift);
5695                 btrfs_mark_buffer_dirty(buf);
5696         }
5697
5698         /*
5699          * We may have moved things, in which case we want to exit so we don't
5700          * write those changes out.  Once we have proper abort functionality in
5701          * progs this can be changed to something nicer.
5702          */
5703         BUG_ON(ret);
5704         return ret;
5705 }
5706
5707 /*
5708  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5709  * then just return -EIO.
5710  */
5711 static int try_to_fix_bad_block(struct btrfs_root *root,
5712                                 struct extent_buffer *buf,
5713                                 enum btrfs_tree_block_status status)
5714 {
5715         struct btrfs_trans_handle *trans;
5716         struct ulist *roots;
5717         struct ulist_node *node;
5718         struct btrfs_root *search_root;
5719         struct btrfs_path path;
5720         struct ulist_iterator iter;
5721         struct btrfs_key root_key, key;
5722         int ret;
5723
5724         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5725             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5726                 return -EIO;
5727
5728         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5729         if (ret)
5730                 return -EIO;
5731
5732         btrfs_init_path(&path);
5733         ULIST_ITER_INIT(&iter);
5734         while ((node = ulist_next(roots, &iter))) {
5735                 root_key.objectid = node->val;
5736                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5737                 root_key.offset = (u64)-1;
5738
5739                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5740                 if (IS_ERR(root)) {
5741                         ret = -EIO;
5742                         break;
5743                 }
5744
5745
5746                 trans = btrfs_start_transaction(search_root, 0);
5747                 if (IS_ERR(trans)) {
5748                         ret = PTR_ERR(trans);
5749                         break;
5750                 }
5751
5752                 path.lowest_level = btrfs_header_level(buf);
5753                 path.skip_check_block = 1;
5754                 if (path.lowest_level)
5755                         btrfs_node_key_to_cpu(buf, &key, 0);
5756                 else
5757                         btrfs_item_key_to_cpu(buf, &key, 0);
5758                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5759                 if (ret) {
5760                         ret = -EIO;
5761                         btrfs_commit_transaction(trans, search_root);
5762                         break;
5763                 }
5764                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5765                         ret = fix_key_order(search_root, &path);
5766                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5767                         ret = fix_item_offset(search_root, &path);
5768                 if (ret) {
5769                         btrfs_commit_transaction(trans, search_root);
5770                         break;
5771                 }
5772                 btrfs_release_path(&path);
5773                 btrfs_commit_transaction(trans, search_root);
5774         }
5775         ulist_free(roots);
5776         btrfs_release_path(&path);
5777         return ret;
5778 }
5779
5780 static int check_block(struct btrfs_root *root,
5781                        struct cache_tree *extent_cache,
5782                        struct extent_buffer *buf, u64 flags)
5783 {
5784         struct extent_record *rec;
5785         struct cache_extent *cache;
5786         struct btrfs_key key;
5787         enum btrfs_tree_block_status status;
5788         int ret = 0;
5789         int level;
5790
5791         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5792         if (!cache)
5793                 return 1;
5794         rec = container_of(cache, struct extent_record, cache);
5795         rec->generation = btrfs_header_generation(buf);
5796
5797         level = btrfs_header_level(buf);
5798         if (btrfs_header_nritems(buf) > 0) {
5799
5800                 if (level == 0)
5801                         btrfs_item_key_to_cpu(buf, &key, 0);
5802                 else
5803                         btrfs_node_key_to_cpu(buf, &key, 0);
5804
5805                 rec->info_objectid = key.objectid;
5806         }
5807         rec->info_level = level;
5808
5809         if (btrfs_is_leaf(buf))
5810                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5811         else
5812                 status = btrfs_check_node(root, &rec->parent_key, buf);
5813
5814         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5815                 if (repair)
5816                         status = try_to_fix_bad_block(root, buf, status);
5817                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5818                         ret = -EIO;
5819                         fprintf(stderr, "bad block %llu\n",
5820                                 (unsigned long long)buf->start);
5821                 } else {
5822                         /*
5823                          * Signal to callers we need to start the scan over
5824                          * again since we'll have cowed blocks.
5825                          */
5826                         ret = -EAGAIN;
5827                 }
5828         } else {
5829                 rec->content_checked = 1;
5830                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5831                         rec->owner_ref_checked = 1;
5832                 else {
5833                         ret = check_owner_ref(root, rec, buf);
5834                         if (!ret)
5835                                 rec->owner_ref_checked = 1;
5836                 }
5837         }
5838         if (!ret)
5839                 maybe_free_extent_rec(extent_cache, rec);
5840         return ret;
5841 }
5842
5843 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5844                                                 u64 parent, u64 root)
5845 {
5846         struct list_head *cur = rec->backrefs.next;
5847         struct extent_backref *node;
5848         struct tree_backref *back;
5849
5850         while(cur != &rec->backrefs) {
5851                 node = to_extent_backref(cur);
5852                 cur = cur->next;
5853                 if (node->is_data)
5854                         continue;
5855                 back = to_tree_backref(node);
5856                 if (parent > 0) {
5857                         if (!node->full_backref)
5858                                 continue;
5859                         if (parent == back->parent)
5860                                 return back;
5861                 } else {
5862                         if (node->full_backref)
5863                                 continue;
5864                         if (back->root == root)
5865                                 return back;
5866                 }
5867         }
5868         return NULL;
5869 }
5870
5871 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5872                                                 u64 parent, u64 root)
5873 {
5874         struct tree_backref *ref = malloc(sizeof(*ref));
5875
5876         if (!ref)
5877                 return NULL;
5878         memset(&ref->node, 0, sizeof(ref->node));
5879         if (parent > 0) {
5880                 ref->parent = parent;
5881                 ref->node.full_backref = 1;
5882         } else {
5883                 ref->root = root;
5884                 ref->node.full_backref = 0;
5885         }
5886         list_add_tail(&ref->node.list, &rec->backrefs);
5887
5888         return ref;
5889 }
5890
5891 static struct data_backref *find_data_backref(struct extent_record *rec,
5892                                                 u64 parent, u64 root,
5893                                                 u64 owner, u64 offset,
5894                                                 int found_ref,
5895                                                 u64 disk_bytenr, u64 bytes)
5896 {
5897         struct list_head *cur = rec->backrefs.next;
5898         struct extent_backref *node;
5899         struct data_backref *back;
5900
5901         while(cur != &rec->backrefs) {
5902                 node = to_extent_backref(cur);
5903                 cur = cur->next;
5904                 if (!node->is_data)
5905                         continue;
5906                 back = to_data_backref(node);
5907                 if (parent > 0) {
5908                         if (!node->full_backref)
5909                                 continue;
5910                         if (parent == back->parent)
5911                                 return back;
5912                 } else {
5913                         if (node->full_backref)
5914                                 continue;
5915                         if (back->root == root && back->owner == owner &&
5916                             back->offset == offset) {
5917                                 if (found_ref && node->found_ref &&
5918                                     (back->bytes != bytes ||
5919                                     back->disk_bytenr != disk_bytenr))
5920                                         continue;
5921                                 return back;
5922                         }
5923                 }
5924         }
5925         return NULL;
5926 }
5927
5928 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5929                                                 u64 parent, u64 root,
5930                                                 u64 owner, u64 offset,
5931                                                 u64 max_size)
5932 {
5933         struct data_backref *ref = malloc(sizeof(*ref));
5934
5935         if (!ref)
5936                 return NULL;
5937         memset(&ref->node, 0, sizeof(ref->node));
5938         ref->node.is_data = 1;
5939
5940         if (parent > 0) {
5941                 ref->parent = parent;
5942                 ref->owner = 0;
5943                 ref->offset = 0;
5944                 ref->node.full_backref = 1;
5945         } else {
5946                 ref->root = root;
5947                 ref->owner = owner;
5948                 ref->offset = offset;
5949                 ref->node.full_backref = 0;
5950         }
5951         ref->bytes = max_size;
5952         ref->found_ref = 0;
5953         ref->num_refs = 0;
5954         list_add_tail(&ref->node.list, &rec->backrefs);
5955         if (max_size > rec->max_size)
5956                 rec->max_size = max_size;
5957         return ref;
5958 }
5959
5960 /* Check if the type of extent matches with its chunk */
5961 static void check_extent_type(struct extent_record *rec)
5962 {
5963         struct btrfs_block_group_cache *bg_cache;
5964
5965         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5966         if (!bg_cache)
5967                 return;
5968
5969         /* data extent, check chunk directly*/
5970         if (!rec->metadata) {
5971                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5972                         rec->wrong_chunk_type = 1;
5973                 return;
5974         }
5975
5976         /* metadata extent, check the obvious case first */
5977         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5978                                  BTRFS_BLOCK_GROUP_METADATA))) {
5979                 rec->wrong_chunk_type = 1;
5980                 return;
5981         }
5982
5983         /*
5984          * Check SYSTEM extent, as it's also marked as metadata, we can only
5985          * make sure it's a SYSTEM extent by its backref
5986          */
5987         if (!list_empty(&rec->backrefs)) {
5988                 struct extent_backref *node;
5989                 struct tree_backref *tback;
5990                 u64 bg_type;
5991
5992                 node = to_extent_backref(rec->backrefs.next);
5993                 if (node->is_data) {
5994                         /* tree block shouldn't have data backref */
5995                         rec->wrong_chunk_type = 1;
5996                         return;
5997                 }
5998                 tback = container_of(node, struct tree_backref, node);
5999
6000                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6001                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6002                 else
6003                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6004                 if (!(bg_cache->flags & bg_type))
6005                         rec->wrong_chunk_type = 1;
6006         }
6007 }
6008
6009 /*
6010  * Allocate a new extent record, fill default values from @tmpl and insert int
6011  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6012  * the cache, otherwise it fails.
6013  */
6014 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6015                 struct extent_record *tmpl)
6016 {
6017         struct extent_record *rec;
6018         int ret = 0;
6019
6020         rec = malloc(sizeof(*rec));
6021         if (!rec)
6022                 return -ENOMEM;
6023         rec->start = tmpl->start;
6024         rec->max_size = tmpl->max_size;
6025         rec->nr = max(tmpl->nr, tmpl->max_size);
6026         rec->found_rec = tmpl->found_rec;
6027         rec->content_checked = tmpl->content_checked;
6028         rec->owner_ref_checked = tmpl->owner_ref_checked;
6029         rec->num_duplicates = 0;
6030         rec->metadata = tmpl->metadata;
6031         rec->flag_block_full_backref = FLAG_UNSET;
6032         rec->bad_full_backref = 0;
6033         rec->crossing_stripes = 0;
6034         rec->wrong_chunk_type = 0;
6035         rec->is_root = tmpl->is_root;
6036         rec->refs = tmpl->refs;
6037         rec->extent_item_refs = tmpl->extent_item_refs;
6038         rec->parent_generation = tmpl->parent_generation;
6039         INIT_LIST_HEAD(&rec->backrefs);
6040         INIT_LIST_HEAD(&rec->dups);
6041         INIT_LIST_HEAD(&rec->list);
6042         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6043         rec->cache.start = tmpl->start;
6044         rec->cache.size = tmpl->nr;
6045         ret = insert_cache_extent(extent_cache, &rec->cache);
6046         if (ret) {
6047                 free(rec);
6048                 return ret;
6049         }
6050         bytes_used += rec->nr;
6051
6052         if (tmpl->metadata)
6053                 rec->crossing_stripes = check_crossing_stripes(global_info,
6054                                 rec->start, global_info->tree_root->nodesize);
6055         check_extent_type(rec);
6056         return ret;
6057 }
6058
6059 /*
6060  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6061  * some are hints:
6062  * - refs              - if found, increase refs
6063  * - is_root           - if found, set
6064  * - content_checked   - if found, set
6065  * - owner_ref_checked - if found, set
6066  *
6067  * If not found, create a new one, initialize and insert.
6068  */
6069 static int add_extent_rec(struct cache_tree *extent_cache,
6070                 struct extent_record *tmpl)
6071 {
6072         struct extent_record *rec;
6073         struct cache_extent *cache;
6074         int ret = 0;
6075         int dup = 0;
6076
6077         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6078         if (cache) {
6079                 rec = container_of(cache, struct extent_record, cache);
6080                 if (tmpl->refs)
6081                         rec->refs++;
6082                 if (rec->nr == 1)
6083                         rec->nr = max(tmpl->nr, tmpl->max_size);
6084
6085                 /*
6086                  * We need to make sure to reset nr to whatever the extent
6087                  * record says was the real size, this way we can compare it to
6088                  * the backrefs.
6089                  */
6090                 if (tmpl->found_rec) {
6091                         if (tmpl->start != rec->start || rec->found_rec) {
6092                                 struct extent_record *tmp;
6093
6094                                 dup = 1;
6095                                 if (list_empty(&rec->list))
6096                                         list_add_tail(&rec->list,
6097                                                       &duplicate_extents);
6098
6099                                 /*
6100                                  * We have to do this song and dance in case we
6101                                  * find an extent record that falls inside of
6102                                  * our current extent record but does not have
6103                                  * the same objectid.
6104                                  */
6105                                 tmp = malloc(sizeof(*tmp));
6106                                 if (!tmp)
6107                                         return -ENOMEM;
6108                                 tmp->start = tmpl->start;
6109                                 tmp->max_size = tmpl->max_size;
6110                                 tmp->nr = tmpl->nr;
6111                                 tmp->found_rec = 1;
6112                                 tmp->metadata = tmpl->metadata;
6113                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6114                                 INIT_LIST_HEAD(&tmp->list);
6115                                 list_add_tail(&tmp->list, &rec->dups);
6116                                 rec->num_duplicates++;
6117                         } else {
6118                                 rec->nr = tmpl->nr;
6119                                 rec->found_rec = 1;
6120                         }
6121                 }
6122
6123                 if (tmpl->extent_item_refs && !dup) {
6124                         if (rec->extent_item_refs) {
6125                                 fprintf(stderr, "block %llu rec "
6126                                         "extent_item_refs %llu, passed %llu\n",
6127                                         (unsigned long long)tmpl->start,
6128                                         (unsigned long long)
6129                                                         rec->extent_item_refs,
6130                                         (unsigned long long)tmpl->extent_item_refs);
6131                         }
6132                         rec->extent_item_refs = tmpl->extent_item_refs;
6133                 }
6134                 if (tmpl->is_root)
6135                         rec->is_root = 1;
6136                 if (tmpl->content_checked)
6137                         rec->content_checked = 1;
6138                 if (tmpl->owner_ref_checked)
6139                         rec->owner_ref_checked = 1;
6140                 memcpy(&rec->parent_key, &tmpl->parent_key,
6141                                 sizeof(tmpl->parent_key));
6142                 if (tmpl->parent_generation)
6143                         rec->parent_generation = tmpl->parent_generation;
6144                 if (rec->max_size < tmpl->max_size)
6145                         rec->max_size = tmpl->max_size;
6146
6147                 /*
6148                  * A metadata extent can't cross stripe_len boundary, otherwise
6149                  * kernel scrub won't be able to handle it.
6150                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6151                  * it.
6152                  */
6153                 if (tmpl->metadata)
6154                         rec->crossing_stripes = check_crossing_stripes(
6155                                         global_info, rec->start,
6156                                         global_info->tree_root->nodesize);
6157                 check_extent_type(rec);
6158                 maybe_free_extent_rec(extent_cache, rec);
6159                 return ret;
6160         }
6161
6162         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6163
6164         return ret;
6165 }
6166
6167 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6168                             u64 parent, u64 root, int found_ref)
6169 {
6170         struct extent_record *rec;
6171         struct tree_backref *back;
6172         struct cache_extent *cache;
6173         int ret;
6174
6175         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6176         if (!cache) {
6177                 struct extent_record tmpl;
6178
6179                 memset(&tmpl, 0, sizeof(tmpl));
6180                 tmpl.start = bytenr;
6181                 tmpl.nr = 1;
6182                 tmpl.metadata = 1;
6183
6184                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6185                 if (ret)
6186                         return ret;
6187
6188                 /* really a bug in cache_extent implement now */
6189                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6190                 if (!cache)
6191                         return -ENOENT;
6192         }
6193
6194         rec = container_of(cache, struct extent_record, cache);
6195         if (rec->start != bytenr) {
6196                 /*
6197                  * Several cause, from unaligned bytenr to over lapping extents
6198                  */
6199                 return -EEXIST;
6200         }
6201
6202         back = find_tree_backref(rec, parent, root);
6203         if (!back) {
6204                 back = alloc_tree_backref(rec, parent, root);
6205                 if (!back)
6206                         return -ENOMEM;
6207         }
6208
6209         if (found_ref) {
6210                 if (back->node.found_ref) {
6211                         fprintf(stderr, "Extent back ref already exists "
6212                                 "for %llu parent %llu root %llu \n",
6213                                 (unsigned long long)bytenr,
6214                                 (unsigned long long)parent,
6215                                 (unsigned long long)root);
6216                 }
6217                 back->node.found_ref = 1;
6218         } else {
6219                 if (back->node.found_extent_tree) {
6220                         fprintf(stderr, "Extent back ref already exists "
6221                                 "for %llu parent %llu root %llu \n",
6222                                 (unsigned long long)bytenr,
6223                                 (unsigned long long)parent,
6224                                 (unsigned long long)root);
6225                 }
6226                 back->node.found_extent_tree = 1;
6227         }
6228         check_extent_type(rec);
6229         maybe_free_extent_rec(extent_cache, rec);
6230         return 0;
6231 }
6232
6233 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6234                             u64 parent, u64 root, u64 owner, u64 offset,
6235                             u32 num_refs, int found_ref, u64 max_size)
6236 {
6237         struct extent_record *rec;
6238         struct data_backref *back;
6239         struct cache_extent *cache;
6240         int ret;
6241
6242         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6243         if (!cache) {
6244                 struct extent_record tmpl;
6245
6246                 memset(&tmpl, 0, sizeof(tmpl));
6247                 tmpl.start = bytenr;
6248                 tmpl.nr = 1;
6249                 tmpl.max_size = max_size;
6250
6251                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6252                 if (ret)
6253                         return ret;
6254
6255                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6256                 if (!cache)
6257                         abort();
6258         }
6259
6260         rec = container_of(cache, struct extent_record, cache);
6261         if (rec->max_size < max_size)
6262                 rec->max_size = max_size;
6263
6264         /*
6265          * If found_ref is set then max_size is the real size and must match the
6266          * existing refs.  So if we have already found a ref then we need to
6267          * make sure that this ref matches the existing one, otherwise we need
6268          * to add a new backref so we can notice that the backrefs don't match
6269          * and we need to figure out who is telling the truth.  This is to
6270          * account for that awful fsync bug I introduced where we'd end up with
6271          * a btrfs_file_extent_item that would have its length include multiple
6272          * prealloc extents or point inside of a prealloc extent.
6273          */
6274         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6275                                  bytenr, max_size);
6276         if (!back) {
6277                 back = alloc_data_backref(rec, parent, root, owner, offset,
6278                                           max_size);
6279                 BUG_ON(!back);
6280         }
6281
6282         if (found_ref) {
6283                 BUG_ON(num_refs != 1);
6284                 if (back->node.found_ref)
6285                         BUG_ON(back->bytes != max_size);
6286                 back->node.found_ref = 1;
6287                 back->found_ref += 1;
6288                 back->bytes = max_size;
6289                 back->disk_bytenr = bytenr;
6290                 rec->refs += 1;
6291                 rec->content_checked = 1;
6292                 rec->owner_ref_checked = 1;
6293         } else {
6294                 if (back->node.found_extent_tree) {
6295                         fprintf(stderr, "Extent back ref already exists "
6296                                 "for %llu parent %llu root %llu "
6297                                 "owner %llu offset %llu num_refs %lu\n",
6298                                 (unsigned long long)bytenr,
6299                                 (unsigned long long)parent,
6300                                 (unsigned long long)root,
6301                                 (unsigned long long)owner,
6302                                 (unsigned long long)offset,
6303                                 (unsigned long)num_refs);
6304                 }
6305                 back->num_refs = num_refs;
6306                 back->node.found_extent_tree = 1;
6307         }
6308         maybe_free_extent_rec(extent_cache, rec);
6309         return 0;
6310 }
6311
6312 static int add_pending(struct cache_tree *pending,
6313                        struct cache_tree *seen, u64 bytenr, u32 size)
6314 {
6315         int ret;
6316         ret = add_cache_extent(seen, bytenr, size);
6317         if (ret)
6318                 return ret;
6319         add_cache_extent(pending, bytenr, size);
6320         return 0;
6321 }
6322
6323 static int pick_next_pending(struct cache_tree *pending,
6324                         struct cache_tree *reada,
6325                         struct cache_tree *nodes,
6326                         u64 last, struct block_info *bits, int bits_nr,
6327                         int *reada_bits)
6328 {
6329         unsigned long node_start = last;
6330         struct cache_extent *cache;
6331         int ret;
6332
6333         cache = search_cache_extent(reada, 0);
6334         if (cache) {
6335                 bits[0].start = cache->start;
6336                 bits[0].size = cache->size;
6337                 *reada_bits = 1;
6338                 return 1;
6339         }
6340         *reada_bits = 0;
6341         if (node_start > 32768)
6342                 node_start -= 32768;
6343
6344         cache = search_cache_extent(nodes, node_start);
6345         if (!cache)
6346                 cache = search_cache_extent(nodes, 0);
6347
6348         if (!cache) {
6349                  cache = search_cache_extent(pending, 0);
6350                  if (!cache)
6351                          return 0;
6352                  ret = 0;
6353                  do {
6354                          bits[ret].start = cache->start;
6355                          bits[ret].size = cache->size;
6356                          cache = next_cache_extent(cache);
6357                          ret++;
6358                  } while (cache && ret < bits_nr);
6359                  return ret;
6360         }
6361
6362         ret = 0;
6363         do {
6364                 bits[ret].start = cache->start;
6365                 bits[ret].size = cache->size;
6366                 cache = next_cache_extent(cache);
6367                 ret++;
6368         } while (cache && ret < bits_nr);
6369
6370         if (bits_nr - ret > 8) {
6371                 u64 lookup = bits[0].start + bits[0].size;
6372                 struct cache_extent *next;
6373                 next = search_cache_extent(pending, lookup);
6374                 while(next) {
6375                         if (next->start - lookup > 32768)
6376                                 break;
6377                         bits[ret].start = next->start;
6378                         bits[ret].size = next->size;
6379                         lookup = next->start + next->size;
6380                         ret++;
6381                         if (ret == bits_nr)
6382                                 break;
6383                         next = next_cache_extent(next);
6384                         if (!next)
6385                                 break;
6386                 }
6387         }
6388         return ret;
6389 }
6390
6391 static void free_chunk_record(struct cache_extent *cache)
6392 {
6393         struct chunk_record *rec;
6394
6395         rec = container_of(cache, struct chunk_record, cache);
6396         list_del_init(&rec->list);
6397         list_del_init(&rec->dextents);
6398         free(rec);
6399 }
6400
6401 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6402 {
6403         cache_tree_free_extents(chunk_cache, free_chunk_record);
6404 }
6405
6406 static void free_device_record(struct rb_node *node)
6407 {
6408         struct device_record *rec;
6409
6410         rec = container_of(node, struct device_record, node);
6411         free(rec);
6412 }
6413
6414 FREE_RB_BASED_TREE(device_cache, free_device_record);
6415
6416 int insert_block_group_record(struct block_group_tree *tree,
6417                               struct block_group_record *bg_rec)
6418 {
6419         int ret;
6420
6421         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6422         if (ret)
6423                 return ret;
6424
6425         list_add_tail(&bg_rec->list, &tree->block_groups);
6426         return 0;
6427 }
6428
6429 static void free_block_group_record(struct cache_extent *cache)
6430 {
6431         struct block_group_record *rec;
6432
6433         rec = container_of(cache, struct block_group_record, cache);
6434         list_del_init(&rec->list);
6435         free(rec);
6436 }
6437
6438 void free_block_group_tree(struct block_group_tree *tree)
6439 {
6440         cache_tree_free_extents(&tree->tree, free_block_group_record);
6441 }
6442
6443 int insert_device_extent_record(struct device_extent_tree *tree,
6444                                 struct device_extent_record *de_rec)
6445 {
6446         int ret;
6447
6448         /*
6449          * Device extent is a bit different from the other extents, because
6450          * the extents which belong to the different devices may have the
6451          * same start and size, so we need use the special extent cache
6452          * search/insert functions.
6453          */
6454         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6455         if (ret)
6456                 return ret;
6457
6458         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6459         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6460         return 0;
6461 }
6462
6463 static void free_device_extent_record(struct cache_extent *cache)
6464 {
6465         struct device_extent_record *rec;
6466
6467         rec = container_of(cache, struct device_extent_record, cache);
6468         if (!list_empty(&rec->chunk_list))
6469                 list_del_init(&rec->chunk_list);
6470         if (!list_empty(&rec->device_list))
6471                 list_del_init(&rec->device_list);
6472         free(rec);
6473 }
6474
6475 void free_device_extent_tree(struct device_extent_tree *tree)
6476 {
6477         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6478 }
6479
6480 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6481 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6482                                  struct extent_buffer *leaf, int slot)
6483 {
6484         struct btrfs_extent_ref_v0 *ref0;
6485         struct btrfs_key key;
6486         int ret;
6487
6488         btrfs_item_key_to_cpu(leaf, &key, slot);
6489         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6490         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6491                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6492                                 0, 0);
6493         } else {
6494                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6495                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6496         }
6497         return ret;
6498 }
6499 #endif
6500
6501 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6502                                             struct btrfs_key *key,
6503                                             int slot)
6504 {
6505         struct btrfs_chunk *ptr;
6506         struct chunk_record *rec;
6507         int num_stripes, i;
6508
6509         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6510         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6511
6512         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6513         if (!rec) {
6514                 fprintf(stderr, "memory allocation failed\n");
6515                 exit(-1);
6516         }
6517
6518         INIT_LIST_HEAD(&rec->list);
6519         INIT_LIST_HEAD(&rec->dextents);
6520         rec->bg_rec = NULL;
6521
6522         rec->cache.start = key->offset;
6523         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6524
6525         rec->generation = btrfs_header_generation(leaf);
6526
6527         rec->objectid = key->objectid;
6528         rec->type = key->type;
6529         rec->offset = key->offset;
6530
6531         rec->length = rec->cache.size;
6532         rec->owner = btrfs_chunk_owner(leaf, ptr);
6533         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6534         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6535         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6536         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6537         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6538         rec->num_stripes = num_stripes;
6539         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6540
6541         for (i = 0; i < rec->num_stripes; ++i) {
6542                 rec->stripes[i].devid =
6543                         btrfs_stripe_devid_nr(leaf, ptr, i);
6544                 rec->stripes[i].offset =
6545                         btrfs_stripe_offset_nr(leaf, ptr, i);
6546                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6547                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6548                                 BTRFS_UUID_SIZE);
6549         }
6550
6551         return rec;
6552 }
6553
6554 static int process_chunk_item(struct cache_tree *chunk_cache,
6555                               struct btrfs_key *key, struct extent_buffer *eb,
6556                               int slot)
6557 {
6558         struct chunk_record *rec;
6559         struct btrfs_chunk *chunk;
6560         int ret = 0;
6561
6562         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6563         /*
6564          * Do extra check for this chunk item,
6565          *
6566          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6567          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6568          * and owner<->key_type check.
6569          */
6570         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6571                                       key->offset);
6572         if (ret < 0) {
6573                 error("chunk(%llu, %llu) is not valid, ignore it",
6574                       key->offset, btrfs_chunk_length(eb, chunk));
6575                 return 0;
6576         }
6577         rec = btrfs_new_chunk_record(eb, key, slot);
6578         ret = insert_cache_extent(chunk_cache, &rec->cache);
6579         if (ret) {
6580                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6581                         rec->offset, rec->length);
6582                 free(rec);
6583         }
6584
6585         return ret;
6586 }
6587
6588 static int process_device_item(struct rb_root *dev_cache,
6589                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6590 {
6591         struct btrfs_dev_item *ptr;
6592         struct device_record *rec;
6593         int ret = 0;
6594
6595         ptr = btrfs_item_ptr(eb,
6596                 slot, struct btrfs_dev_item);
6597
6598         rec = malloc(sizeof(*rec));
6599         if (!rec) {
6600                 fprintf(stderr, "memory allocation failed\n");
6601                 return -ENOMEM;
6602         }
6603
6604         rec->devid = key->offset;
6605         rec->generation = btrfs_header_generation(eb);
6606
6607         rec->objectid = key->objectid;
6608         rec->type = key->type;
6609         rec->offset = key->offset;
6610
6611         rec->devid = btrfs_device_id(eb, ptr);
6612         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6613         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6614
6615         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6616         if (ret) {
6617                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6618                 free(rec);
6619         }
6620
6621         return ret;
6622 }
6623
6624 struct block_group_record *
6625 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6626                              int slot)
6627 {
6628         struct btrfs_block_group_item *ptr;
6629         struct block_group_record *rec;
6630
6631         rec = calloc(1, sizeof(*rec));
6632         if (!rec) {
6633                 fprintf(stderr, "memory allocation failed\n");
6634                 exit(-1);
6635         }
6636
6637         rec->cache.start = key->objectid;
6638         rec->cache.size = key->offset;
6639
6640         rec->generation = btrfs_header_generation(leaf);
6641
6642         rec->objectid = key->objectid;
6643         rec->type = key->type;
6644         rec->offset = key->offset;
6645
6646         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6647         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6648
6649         INIT_LIST_HEAD(&rec->list);
6650
6651         return rec;
6652 }
6653
6654 static int process_block_group_item(struct block_group_tree *block_group_cache,
6655                                     struct btrfs_key *key,
6656                                     struct extent_buffer *eb, int slot)
6657 {
6658         struct block_group_record *rec;
6659         int ret = 0;
6660
6661         rec = btrfs_new_block_group_record(eb, key, slot);
6662         ret = insert_block_group_record(block_group_cache, rec);
6663         if (ret) {
6664                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6665                         rec->objectid, rec->offset);
6666                 free(rec);
6667         }
6668
6669         return ret;
6670 }
6671
6672 struct device_extent_record *
6673 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6674                                struct btrfs_key *key, int slot)
6675 {
6676         struct device_extent_record *rec;
6677         struct btrfs_dev_extent *ptr;
6678
6679         rec = calloc(1, sizeof(*rec));
6680         if (!rec) {
6681                 fprintf(stderr, "memory allocation failed\n");
6682                 exit(-1);
6683         }
6684
6685         rec->cache.objectid = key->objectid;
6686         rec->cache.start = key->offset;
6687
6688         rec->generation = btrfs_header_generation(leaf);
6689
6690         rec->objectid = key->objectid;
6691         rec->type = key->type;
6692         rec->offset = key->offset;
6693
6694         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6695         rec->chunk_objecteid =
6696                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6697         rec->chunk_offset =
6698                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6699         rec->length = btrfs_dev_extent_length(leaf, ptr);
6700         rec->cache.size = rec->length;
6701
6702         INIT_LIST_HEAD(&rec->chunk_list);
6703         INIT_LIST_HEAD(&rec->device_list);
6704
6705         return rec;
6706 }
6707
6708 static int
6709 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6710                            struct btrfs_key *key, struct extent_buffer *eb,
6711                            int slot)
6712 {
6713         struct device_extent_record *rec;
6714         int ret;
6715
6716         rec = btrfs_new_device_extent_record(eb, key, slot);
6717         ret = insert_device_extent_record(dev_extent_cache, rec);
6718         if (ret) {
6719                 fprintf(stderr,
6720                         "Device extent[%llu, %llu, %llu] existed.\n",
6721                         rec->objectid, rec->offset, rec->length);
6722                 free(rec);
6723         }
6724
6725         return ret;
6726 }
6727
6728 static int process_extent_item(struct btrfs_root *root,
6729                                struct cache_tree *extent_cache,
6730                                struct extent_buffer *eb, int slot)
6731 {
6732         struct btrfs_extent_item *ei;
6733         struct btrfs_extent_inline_ref *iref;
6734         struct btrfs_extent_data_ref *dref;
6735         struct btrfs_shared_data_ref *sref;
6736         struct btrfs_key key;
6737         struct extent_record tmpl;
6738         unsigned long end;
6739         unsigned long ptr;
6740         int ret;
6741         int type;
6742         u32 item_size = btrfs_item_size_nr(eb, slot);
6743         u64 refs = 0;
6744         u64 offset;
6745         u64 num_bytes;
6746         int metadata = 0;
6747
6748         btrfs_item_key_to_cpu(eb, &key, slot);
6749
6750         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6751                 metadata = 1;
6752                 num_bytes = root->nodesize;
6753         } else {
6754                 num_bytes = key.offset;
6755         }
6756
6757         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6758                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6759                       key.objectid, root->sectorsize);
6760                 return -EIO;
6761         }
6762         if (item_size < sizeof(*ei)) {
6763 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6764                 struct btrfs_extent_item_v0 *ei0;
6765                 BUG_ON(item_size != sizeof(*ei0));
6766                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6767                 refs = btrfs_extent_refs_v0(eb, ei0);
6768 #else
6769                 BUG();
6770 #endif
6771                 memset(&tmpl, 0, sizeof(tmpl));
6772                 tmpl.start = key.objectid;
6773                 tmpl.nr = num_bytes;
6774                 tmpl.extent_item_refs = refs;
6775                 tmpl.metadata = metadata;
6776                 tmpl.found_rec = 1;
6777                 tmpl.max_size = num_bytes;
6778
6779                 return add_extent_rec(extent_cache, &tmpl);
6780         }
6781
6782         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6783         refs = btrfs_extent_refs(eb, ei);
6784         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6785                 metadata = 1;
6786         else
6787                 metadata = 0;
6788         if (metadata && num_bytes != root->nodesize) {
6789                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6790                       num_bytes, root->nodesize);
6791                 return -EIO;
6792         }
6793         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6794                 error("ignore invalid data extent, length %llu is not aligned to %u",
6795                       num_bytes, root->sectorsize);
6796                 return -EIO;
6797         }
6798
6799         memset(&tmpl, 0, sizeof(tmpl));
6800         tmpl.start = key.objectid;
6801         tmpl.nr = num_bytes;
6802         tmpl.extent_item_refs = refs;
6803         tmpl.metadata = metadata;
6804         tmpl.found_rec = 1;
6805         tmpl.max_size = num_bytes;
6806         add_extent_rec(extent_cache, &tmpl);
6807
6808         ptr = (unsigned long)(ei + 1);
6809         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6810             key.type == BTRFS_EXTENT_ITEM_KEY)
6811                 ptr += sizeof(struct btrfs_tree_block_info);
6812
6813         end = (unsigned long)ei + item_size;
6814         while (ptr < end) {
6815                 iref = (struct btrfs_extent_inline_ref *)ptr;
6816                 type = btrfs_extent_inline_ref_type(eb, iref);
6817                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6818                 switch (type) {
6819                 case BTRFS_TREE_BLOCK_REF_KEY:
6820                         ret = add_tree_backref(extent_cache, key.objectid,
6821                                         0, offset, 0);
6822                         if (ret < 0)
6823                                 error("add_tree_backref failed: %s",
6824                                       strerror(-ret));
6825                         break;
6826                 case BTRFS_SHARED_BLOCK_REF_KEY:
6827                         ret = add_tree_backref(extent_cache, key.objectid,
6828                                         offset, 0, 0);
6829                         if (ret < 0)
6830                                 error("add_tree_backref failed: %s",
6831                                       strerror(-ret));
6832                         break;
6833                 case BTRFS_EXTENT_DATA_REF_KEY:
6834                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6835                         add_data_backref(extent_cache, key.objectid, 0,
6836                                         btrfs_extent_data_ref_root(eb, dref),
6837                                         btrfs_extent_data_ref_objectid(eb,
6838                                                                        dref),
6839                                         btrfs_extent_data_ref_offset(eb, dref),
6840                                         btrfs_extent_data_ref_count(eb, dref),
6841                                         0, num_bytes);
6842                         break;
6843                 case BTRFS_SHARED_DATA_REF_KEY:
6844                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6845                         add_data_backref(extent_cache, key.objectid, offset,
6846                                         0, 0, 0,
6847                                         btrfs_shared_data_ref_count(eb, sref),
6848                                         0, num_bytes);
6849                         break;
6850                 default:
6851                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6852                                 key.objectid, key.type, num_bytes);
6853                         goto out;
6854                 }
6855                 ptr += btrfs_extent_inline_ref_size(type);
6856         }
6857         WARN_ON(ptr > end);
6858 out:
6859         return 0;
6860 }
6861
6862 static int check_cache_range(struct btrfs_root *root,
6863                              struct btrfs_block_group_cache *cache,
6864                              u64 offset, u64 bytes)
6865 {
6866         struct btrfs_free_space *entry;
6867         u64 *logical;
6868         u64 bytenr;
6869         int stripe_len;
6870         int i, nr, ret;
6871
6872         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6873                 bytenr = btrfs_sb_offset(i);
6874                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6875                                        cache->key.objectid, bytenr, 0,
6876                                        &logical, &nr, &stripe_len);
6877                 if (ret)
6878                         return ret;
6879
6880                 while (nr--) {
6881                         if (logical[nr] + stripe_len <= offset)
6882                                 continue;
6883                         if (offset + bytes <= logical[nr])
6884                                 continue;
6885                         if (logical[nr] == offset) {
6886                                 if (stripe_len >= bytes) {
6887                                         free(logical);
6888                                         return 0;
6889                                 }
6890                                 bytes -= stripe_len;
6891                                 offset += stripe_len;
6892                         } else if (logical[nr] < offset) {
6893                                 if (logical[nr] + stripe_len >=
6894                                     offset + bytes) {
6895                                         free(logical);
6896                                         return 0;
6897                                 }
6898                                 bytes = (offset + bytes) -
6899                                         (logical[nr] + stripe_len);
6900                                 offset = logical[nr] + stripe_len;
6901                         } else {
6902                                 /*
6903                                  * Could be tricky, the super may land in the
6904                                  * middle of the area we're checking.  First
6905                                  * check the easiest case, it's at the end.
6906                                  */
6907                                 if (logical[nr] + stripe_len >=
6908                                     bytes + offset) {
6909                                         bytes = logical[nr] - offset;
6910                                         continue;
6911                                 }
6912
6913                                 /* Check the left side */
6914                                 ret = check_cache_range(root, cache,
6915                                                         offset,
6916                                                         logical[nr] - offset);
6917                                 if (ret) {
6918                                         free(logical);
6919                                         return ret;
6920                                 }
6921
6922                                 /* Now we continue with the right side */
6923                                 bytes = (offset + bytes) -
6924                                         (logical[nr] + stripe_len);
6925                                 offset = logical[nr] + stripe_len;
6926                         }
6927                 }
6928
6929                 free(logical);
6930         }
6931
6932         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6933         if (!entry) {
6934                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6935                         offset, offset+bytes);
6936                 return -EINVAL;
6937         }
6938
6939         if (entry->offset != offset) {
6940                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6941                         entry->offset);
6942                 return -EINVAL;
6943         }
6944
6945         if (entry->bytes != bytes) {
6946                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6947                         bytes, entry->bytes, offset);
6948                 return -EINVAL;
6949         }
6950
6951         unlink_free_space(cache->free_space_ctl, entry);
6952         free(entry);
6953         return 0;
6954 }
6955
6956 static int verify_space_cache(struct btrfs_root *root,
6957                               struct btrfs_block_group_cache *cache)
6958 {
6959         struct btrfs_path path;
6960         struct extent_buffer *leaf;
6961         struct btrfs_key key;
6962         u64 last;
6963         int ret = 0;
6964
6965         root = root->fs_info->extent_root;
6966
6967         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6968
6969         btrfs_init_path(&path);
6970         key.objectid = last;
6971         key.offset = 0;
6972         key.type = BTRFS_EXTENT_ITEM_KEY;
6973         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6974         if (ret < 0)
6975                 goto out;
6976         ret = 0;
6977         while (1) {
6978                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6979                         ret = btrfs_next_leaf(root, &path);
6980                         if (ret < 0)
6981                                 goto out;
6982                         if (ret > 0) {
6983                                 ret = 0;
6984                                 break;
6985                         }
6986                 }
6987                 leaf = path.nodes[0];
6988                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6989                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6990                         break;
6991                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6992                     key.type != BTRFS_METADATA_ITEM_KEY) {
6993                         path.slots[0]++;
6994                         continue;
6995                 }
6996
6997                 if (last == key.objectid) {
6998                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6999                                 last = key.objectid + key.offset;
7000                         else
7001                                 last = key.objectid + root->nodesize;
7002                         path.slots[0]++;
7003                         continue;
7004                 }
7005
7006                 ret = check_cache_range(root, cache, last,
7007                                         key.objectid - last);
7008                 if (ret)
7009                         break;
7010                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7011                         last = key.objectid + key.offset;
7012                 else
7013                         last = key.objectid + root->nodesize;
7014                 path.slots[0]++;
7015         }
7016
7017         if (last < cache->key.objectid + cache->key.offset)
7018                 ret = check_cache_range(root, cache, last,
7019                                         cache->key.objectid +
7020                                         cache->key.offset - last);
7021
7022 out:
7023         btrfs_release_path(&path);
7024
7025         if (!ret &&
7026             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7027                 fprintf(stderr, "There are still entries left in the space "
7028                         "cache\n");
7029                 ret = -EINVAL;
7030         }
7031
7032         return ret;
7033 }
7034
7035 static int check_space_cache(struct btrfs_root *root)
7036 {
7037         struct btrfs_block_group_cache *cache;
7038         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7039         int ret;
7040         int error = 0;
7041
7042         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7043             btrfs_super_generation(root->fs_info->super_copy) !=
7044             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7045                 printf("cache and super generation don't match, space cache "
7046                        "will be invalidated\n");
7047                 return 0;
7048         }
7049
7050         if (ctx.progress_enabled) {
7051                 ctx.tp = TASK_FREE_SPACE;
7052                 task_start(ctx.info);
7053         }
7054
7055         while (1) {
7056                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7057                 if (!cache)
7058                         break;
7059
7060                 start = cache->key.objectid + cache->key.offset;
7061                 if (!cache->free_space_ctl) {
7062                         if (btrfs_init_free_space_ctl(cache,
7063                                                       root->sectorsize)) {
7064                                 ret = -ENOMEM;
7065                                 break;
7066                         }
7067                 } else {
7068                         btrfs_remove_free_space_cache(cache);
7069                 }
7070
7071                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7072                         ret = exclude_super_stripes(root, cache);
7073                         if (ret) {
7074                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7075                                         strerror(-ret));
7076                                 error++;
7077                                 continue;
7078                         }
7079                         ret = load_free_space_tree(root->fs_info, cache);
7080                         free_excluded_extents(root, cache);
7081                         if (ret < 0) {
7082                                 fprintf(stderr, "could not load free space tree: %s\n",
7083                                         strerror(-ret));
7084                                 error++;
7085                                 continue;
7086                         }
7087                         error += ret;
7088                 } else {
7089                         ret = load_free_space_cache(root->fs_info, cache);
7090                         if (!ret)
7091                                 continue;
7092                 }
7093
7094                 ret = verify_space_cache(root, cache);
7095                 if (ret) {
7096                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7097                                 cache->key.objectid);
7098                         error++;
7099                 }
7100         }
7101
7102         task_stop(ctx.info);
7103
7104         return error ? -EINVAL : 0;
7105 }
7106
7107 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7108                         u64 num_bytes, unsigned long leaf_offset,
7109                         struct extent_buffer *eb) {
7110
7111         u64 offset = 0;
7112         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7113         char *data;
7114         unsigned long csum_offset;
7115         u32 csum;
7116         u32 csum_expected;
7117         u64 read_len;
7118         u64 data_checked = 0;
7119         u64 tmp;
7120         int ret = 0;
7121         int mirror;
7122         int num_copies;
7123
7124         if (num_bytes % root->sectorsize)
7125                 return -EINVAL;
7126
7127         data = malloc(num_bytes);
7128         if (!data)
7129                 return -ENOMEM;
7130
7131         while (offset < num_bytes) {
7132                 mirror = 0;
7133 again:
7134                 read_len = num_bytes - offset;
7135                 /* read as much space once a time */
7136                 ret = read_extent_data(root, data + offset,
7137                                 bytenr + offset, &read_len, mirror);
7138                 if (ret)
7139                         goto out;
7140                 data_checked = 0;
7141                 /* verify every 4k data's checksum */
7142                 while (data_checked < read_len) {
7143                         csum = ~(u32)0;
7144                         tmp = offset + data_checked;
7145
7146                         csum = btrfs_csum_data((char *)data + tmp,
7147                                                csum, root->sectorsize);
7148                         btrfs_csum_final(csum, (u8 *)&csum);
7149
7150                         csum_offset = leaf_offset +
7151                                  tmp / root->sectorsize * csum_size;
7152                         read_extent_buffer(eb, (char *)&csum_expected,
7153                                            csum_offset, csum_size);
7154                         /* try another mirror */
7155                         if (csum != csum_expected) {
7156                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7157                                                 mirror, bytenr + tmp,
7158                                                 csum, csum_expected);
7159                                 num_copies = btrfs_num_copies(
7160                                                 &root->fs_info->mapping_tree,
7161                                                 bytenr, num_bytes);
7162                                 if (mirror < num_copies - 1) {
7163                                         mirror += 1;
7164                                         goto again;
7165                                 }
7166                         }
7167                         data_checked += root->sectorsize;
7168                 }
7169                 offset += read_len;
7170         }
7171 out:
7172         free(data);
7173         return ret;
7174 }
7175
7176 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7177                                u64 num_bytes)
7178 {
7179         struct btrfs_path path;
7180         struct extent_buffer *leaf;
7181         struct btrfs_key key;
7182         int ret;
7183
7184         btrfs_init_path(&path);
7185         key.objectid = bytenr;
7186         key.type = BTRFS_EXTENT_ITEM_KEY;
7187         key.offset = (u64)-1;
7188
7189 again:
7190         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7191                                 0, 0);
7192         if (ret < 0) {
7193                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7194                 btrfs_release_path(&path);
7195                 return ret;
7196         } else if (ret) {
7197                 if (path.slots[0] > 0) {
7198                         path.slots[0]--;
7199                 } else {
7200                         ret = btrfs_prev_leaf(root, &path);
7201                         if (ret < 0) {
7202                                 goto out;
7203                         } else if (ret > 0) {
7204                                 ret = 0;
7205                                 goto out;
7206                         }
7207                 }
7208         }
7209
7210         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7211
7212         /*
7213          * Block group items come before extent items if they have the same
7214          * bytenr, so walk back one more just in case.  Dear future traveller,
7215          * first congrats on mastering time travel.  Now if it's not too much
7216          * trouble could you go back to 2006 and tell Chris to make the
7217          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7218          * EXTENT_ITEM_KEY please?
7219          */
7220         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7221                 if (path.slots[0] > 0) {
7222                         path.slots[0]--;
7223                 } else {
7224                         ret = btrfs_prev_leaf(root, &path);
7225                         if (ret < 0) {
7226                                 goto out;
7227                         } else if (ret > 0) {
7228                                 ret = 0;
7229                                 goto out;
7230                         }
7231                 }
7232                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7233         }
7234
7235         while (num_bytes) {
7236                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7237                         ret = btrfs_next_leaf(root, &path);
7238                         if (ret < 0) {
7239                                 fprintf(stderr, "Error going to next leaf "
7240                                         "%d\n", ret);
7241                                 btrfs_release_path(&path);
7242                                 return ret;
7243                         } else if (ret) {
7244                                 break;
7245                         }
7246                 }
7247                 leaf = path.nodes[0];
7248                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7249                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7250                         path.slots[0]++;
7251                         continue;
7252                 }
7253                 if (key.objectid + key.offset < bytenr) {
7254                         path.slots[0]++;
7255                         continue;
7256                 }
7257                 if (key.objectid > bytenr + num_bytes)
7258                         break;
7259
7260                 if (key.objectid == bytenr) {
7261                         if (key.offset >= num_bytes) {
7262                                 num_bytes = 0;
7263                                 break;
7264                         }
7265                         num_bytes -= key.offset;
7266                         bytenr += key.offset;
7267                 } else if (key.objectid < bytenr) {
7268                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7269                                 num_bytes = 0;
7270                                 break;
7271                         }
7272                         num_bytes = (bytenr + num_bytes) -
7273                                 (key.objectid + key.offset);
7274                         bytenr = key.objectid + key.offset;
7275                 } else {
7276                         if (key.objectid + key.offset < bytenr + num_bytes) {
7277                                 u64 new_start = key.objectid + key.offset;
7278                                 u64 new_bytes = bytenr + num_bytes - new_start;
7279
7280                                 /*
7281                                  * Weird case, the extent is in the middle of
7282                                  * our range, we'll have to search one side
7283                                  * and then the other.  Not sure if this happens
7284                                  * in real life, but no harm in coding it up
7285                                  * anyway just in case.
7286                                  */
7287                                 btrfs_release_path(&path);
7288                                 ret = check_extent_exists(root, new_start,
7289                                                           new_bytes);
7290                                 if (ret) {
7291                                         fprintf(stderr, "Right section didn't "
7292                                                 "have a record\n");
7293                                         break;
7294                                 }
7295                                 num_bytes = key.objectid - bytenr;
7296                                 goto again;
7297                         }
7298                         num_bytes = key.objectid - bytenr;
7299                 }
7300                 path.slots[0]++;
7301         }
7302         ret = 0;
7303
7304 out:
7305         if (num_bytes && !ret) {
7306                 fprintf(stderr, "There are no extents for csum range "
7307                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7308                 ret = 1;
7309         }
7310
7311         btrfs_release_path(&path);
7312         return ret;
7313 }
7314
7315 static int check_csums(struct btrfs_root *root)
7316 {
7317         struct btrfs_path path;
7318         struct extent_buffer *leaf;
7319         struct btrfs_key key;
7320         u64 offset = 0, num_bytes = 0;
7321         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7322         int errors = 0;
7323         int ret;
7324         u64 data_len;
7325         unsigned long leaf_offset;
7326
7327         root = root->fs_info->csum_root;
7328         if (!extent_buffer_uptodate(root->node)) {
7329                 fprintf(stderr, "No valid csum tree found\n");
7330                 return -ENOENT;
7331         }
7332
7333         btrfs_init_path(&path);
7334         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7335         key.type = BTRFS_EXTENT_CSUM_KEY;
7336         key.offset = 0;
7337         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7338         if (ret < 0) {
7339                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7340                 btrfs_release_path(&path);
7341                 return ret;
7342         }
7343
7344         if (ret > 0 && path.slots[0])
7345                 path.slots[0]--;
7346         ret = 0;
7347
7348         while (1) {
7349                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7350                         ret = btrfs_next_leaf(root, &path);
7351                         if (ret < 0) {
7352                                 fprintf(stderr, "Error going to next leaf "
7353                                         "%d\n", ret);
7354                                 break;
7355                         }
7356                         if (ret)
7357                                 break;
7358                 }
7359                 leaf = path.nodes[0];
7360
7361                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7362                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7363                         path.slots[0]++;
7364                         continue;
7365                 }
7366
7367                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7368                               csum_size) * root->sectorsize;
7369                 if (!check_data_csum)
7370                         goto skip_csum_check;
7371                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7372                 ret = check_extent_csums(root, key.offset, data_len,
7373                                          leaf_offset, leaf);
7374                 if (ret)
7375                         break;
7376 skip_csum_check:
7377                 if (!num_bytes) {
7378                         offset = key.offset;
7379                 } else if (key.offset != offset + num_bytes) {
7380                         ret = check_extent_exists(root, offset, num_bytes);
7381                         if (ret) {
7382                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7383                                         "there is no extent record\n",
7384                                         offset, offset+num_bytes);
7385                                 errors++;
7386                         }
7387                         offset = key.offset;
7388                         num_bytes = 0;
7389                 }
7390                 num_bytes += data_len;
7391                 path.slots[0]++;
7392         }
7393
7394         btrfs_release_path(&path);
7395         return errors;
7396 }
7397
7398 static int is_dropped_key(struct btrfs_key *key,
7399                           struct btrfs_key *drop_key) {
7400         if (key->objectid < drop_key->objectid)
7401                 return 1;
7402         else if (key->objectid == drop_key->objectid) {
7403                 if (key->type < drop_key->type)
7404                         return 1;
7405                 else if (key->type == drop_key->type) {
7406                         if (key->offset < drop_key->offset)
7407                                 return 1;
7408                 }
7409         }
7410         return 0;
7411 }
7412
7413 /*
7414  * Here are the rules for FULL_BACKREF.
7415  *
7416  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7417  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7418  *      FULL_BACKREF set.
7419  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7420  *    if it happened after the relocation occurred since we'll have dropped the
7421  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7422  *    have no real way to know for sure.
7423  *
7424  * We process the blocks one root at a time, and we start from the lowest root
7425  * objectid and go to the highest.  So we can just lookup the owner backref for
7426  * the record and if we don't find it then we know it doesn't exist and we have
7427  * a FULL BACKREF.
7428  *
7429  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7430  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7431  * be set or not and then we can check later once we've gathered all the refs.
7432  */
7433 static int calc_extent_flag(struct cache_tree *extent_cache,
7434                            struct extent_buffer *buf,
7435                            struct root_item_record *ri,
7436                            u64 *flags)
7437 {
7438         struct extent_record *rec;
7439         struct cache_extent *cache;
7440         struct tree_backref *tback;
7441         u64 owner = 0;
7442
7443         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7444         /* we have added this extent before */
7445         if (!cache)
7446                 return -ENOENT;
7447
7448         rec = container_of(cache, struct extent_record, cache);
7449
7450         /*
7451          * Except file/reloc tree, we can not have
7452          * FULL BACKREF MODE
7453          */
7454         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7455                 goto normal;
7456         /*
7457          * root node
7458          */
7459         if (buf->start == ri->bytenr)
7460                 goto normal;
7461
7462         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7463                 goto full_backref;
7464
7465         owner = btrfs_header_owner(buf);
7466         if (owner == ri->objectid)
7467                 goto normal;
7468
7469         tback = find_tree_backref(rec, 0, owner);
7470         if (!tback)
7471                 goto full_backref;
7472 normal:
7473         *flags = 0;
7474         if (rec->flag_block_full_backref != FLAG_UNSET &&
7475             rec->flag_block_full_backref != 0)
7476                 rec->bad_full_backref = 1;
7477         return 0;
7478 full_backref:
7479         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7480         if (rec->flag_block_full_backref != FLAG_UNSET &&
7481             rec->flag_block_full_backref != 1)
7482                 rec->bad_full_backref = 1;
7483         return 0;
7484 }
7485
7486 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7487 {
7488         fprintf(stderr, "Invalid key type(");
7489         print_key_type(stderr, 0, key_type);
7490         fprintf(stderr, ") found in root(");
7491         print_objectid(stderr, rootid, 0);
7492         fprintf(stderr, ")\n");
7493 }
7494
7495 /*
7496  * Check if the key is valid with its extent buffer.
7497  *
7498  * This is a early check in case invalid key exists in a extent buffer
7499  * This is not comprehensive yet, but should prevent wrong key/item passed
7500  * further
7501  */
7502 static int check_type_with_root(u64 rootid, u8 key_type)
7503 {
7504         switch (key_type) {
7505         /* Only valid in chunk tree */
7506         case BTRFS_DEV_ITEM_KEY:
7507         case BTRFS_CHUNK_ITEM_KEY:
7508                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7509                         goto err;
7510                 break;
7511         /* valid in csum and log tree */
7512         case BTRFS_CSUM_TREE_OBJECTID:
7513                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7514                       is_fstree(rootid)))
7515                         goto err;
7516                 break;
7517         case BTRFS_EXTENT_ITEM_KEY:
7518         case BTRFS_METADATA_ITEM_KEY:
7519         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7520                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7521                         goto err;
7522                 break;
7523         case BTRFS_ROOT_ITEM_KEY:
7524                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7525                         goto err;
7526                 break;
7527         case BTRFS_DEV_EXTENT_KEY:
7528                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7529                         goto err;
7530                 break;
7531         }
7532         return 0;
7533 err:
7534         report_mismatch_key_root(key_type, rootid);
7535         return -EINVAL;
7536 }
7537
7538 static int run_next_block(struct btrfs_root *root,
7539                           struct block_info *bits,
7540                           int bits_nr,
7541                           u64 *last,
7542                           struct cache_tree *pending,
7543                           struct cache_tree *seen,
7544                           struct cache_tree *reada,
7545                           struct cache_tree *nodes,
7546                           struct cache_tree *extent_cache,
7547                           struct cache_tree *chunk_cache,
7548                           struct rb_root *dev_cache,
7549                           struct block_group_tree *block_group_cache,
7550                           struct device_extent_tree *dev_extent_cache,
7551                           struct root_item_record *ri)
7552 {
7553         struct extent_buffer *buf;
7554         struct extent_record *rec = NULL;
7555         u64 bytenr;
7556         u32 size;
7557         u64 parent;
7558         u64 owner;
7559         u64 flags;
7560         u64 ptr;
7561         u64 gen = 0;
7562         int ret = 0;
7563         int i;
7564         int nritems;
7565         struct btrfs_key key;
7566         struct cache_extent *cache;
7567         int reada_bits;
7568
7569         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7570                                     bits_nr, &reada_bits);
7571         if (nritems == 0)
7572                 return 1;
7573
7574         if (!reada_bits) {
7575                 for(i = 0; i < nritems; i++) {
7576                         ret = add_cache_extent(reada, bits[i].start,
7577                                                bits[i].size);
7578                         if (ret == -EEXIST)
7579                                 continue;
7580
7581                         /* fixme, get the parent transid */
7582                         readahead_tree_block(root, bits[i].start,
7583                                              bits[i].size, 0);
7584                 }
7585         }
7586         *last = bits[0].start;
7587         bytenr = bits[0].start;
7588         size = bits[0].size;
7589
7590         cache = lookup_cache_extent(pending, bytenr, size);
7591         if (cache) {
7592                 remove_cache_extent(pending, cache);
7593                 free(cache);
7594         }
7595         cache = lookup_cache_extent(reada, bytenr, size);
7596         if (cache) {
7597                 remove_cache_extent(reada, cache);
7598                 free(cache);
7599         }
7600         cache = lookup_cache_extent(nodes, bytenr, size);
7601         if (cache) {
7602                 remove_cache_extent(nodes, cache);
7603                 free(cache);
7604         }
7605         cache = lookup_cache_extent(extent_cache, bytenr, size);
7606         if (cache) {
7607                 rec = container_of(cache, struct extent_record, cache);
7608                 gen = rec->parent_generation;
7609         }
7610
7611         /* fixme, get the real parent transid */
7612         buf = read_tree_block(root, bytenr, size, gen);
7613         if (!extent_buffer_uptodate(buf)) {
7614                 record_bad_block_io(root->fs_info,
7615                                     extent_cache, bytenr, size);
7616                 goto out;
7617         }
7618
7619         nritems = btrfs_header_nritems(buf);
7620
7621         flags = 0;
7622         if (!init_extent_tree) {
7623                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7624                                        btrfs_header_level(buf), 1, NULL,
7625                                        &flags);
7626                 if (ret < 0) {
7627                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7628                         if (ret < 0) {
7629                                 fprintf(stderr, "Couldn't calc extent flags\n");
7630                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7631                         }
7632                 }
7633         } else {
7634                 flags = 0;
7635                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7636                 if (ret < 0) {
7637                         fprintf(stderr, "Couldn't calc extent flags\n");
7638                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7639                 }
7640         }
7641
7642         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7643                 if (ri != NULL &&
7644                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7645                     ri->objectid == btrfs_header_owner(buf)) {
7646                         /*
7647                          * Ok we got to this block from it's original owner and
7648                          * we have FULL_BACKREF set.  Relocation can leave
7649                          * converted blocks over so this is altogether possible,
7650                          * however it's not possible if the generation > the
7651                          * last snapshot, so check for this case.
7652                          */
7653                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7654                             btrfs_header_generation(buf) > ri->last_snapshot) {
7655                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7656                                 rec->bad_full_backref = 1;
7657                         }
7658                 }
7659         } else {
7660                 if (ri != NULL &&
7661                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7662                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7663                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7664                         rec->bad_full_backref = 1;
7665                 }
7666         }
7667
7668         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7669                 rec->flag_block_full_backref = 1;
7670                 parent = bytenr;
7671                 owner = 0;
7672         } else {
7673                 rec->flag_block_full_backref = 0;
7674                 parent = 0;
7675                 owner = btrfs_header_owner(buf);
7676         }
7677
7678         ret = check_block(root, extent_cache, buf, flags);
7679         if (ret)
7680                 goto out;
7681
7682         if (btrfs_is_leaf(buf)) {
7683                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7684                 for (i = 0; i < nritems; i++) {
7685                         struct btrfs_file_extent_item *fi;
7686                         btrfs_item_key_to_cpu(buf, &key, i);
7687                         /*
7688                          * Check key type against the leaf owner.
7689                          * Could filter quite a lot of early error if
7690                          * owner is correct
7691                          */
7692                         if (check_type_with_root(btrfs_header_owner(buf),
7693                                                  key.type)) {
7694                                 fprintf(stderr, "ignoring invalid key\n");
7695                                 continue;
7696                         }
7697                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7698                                 process_extent_item(root, extent_cache, buf,
7699                                                     i);
7700                                 continue;
7701                         }
7702                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7703                                 process_extent_item(root, extent_cache, buf,
7704                                                     i);
7705                                 continue;
7706                         }
7707                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7708                                 total_csum_bytes +=
7709                                         btrfs_item_size_nr(buf, i);
7710                                 continue;
7711                         }
7712                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7713                                 process_chunk_item(chunk_cache, &key, buf, i);
7714                                 continue;
7715                         }
7716                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7717                                 process_device_item(dev_cache, &key, buf, i);
7718                                 continue;
7719                         }
7720                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7721                                 process_block_group_item(block_group_cache,
7722                                         &key, buf, i);
7723                                 continue;
7724                         }
7725                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7726                                 process_device_extent_item(dev_extent_cache,
7727                                         &key, buf, i);
7728                                 continue;
7729
7730                         }
7731                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7732 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7733                                 process_extent_ref_v0(extent_cache, buf, i);
7734 #else
7735                                 BUG();
7736 #endif
7737                                 continue;
7738                         }
7739
7740                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7741                                 ret = add_tree_backref(extent_cache,
7742                                                 key.objectid, 0, key.offset, 0);
7743                                 if (ret < 0)
7744                                         error("add_tree_backref failed: %s",
7745                                               strerror(-ret));
7746                                 continue;
7747                         }
7748                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7749                                 ret = add_tree_backref(extent_cache,
7750                                                 key.objectid, key.offset, 0, 0);
7751                                 if (ret < 0)
7752                                         error("add_tree_backref failed: %s",
7753                                               strerror(-ret));
7754                                 continue;
7755                         }
7756                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7757                                 struct btrfs_extent_data_ref *ref;
7758                                 ref = btrfs_item_ptr(buf, i,
7759                                                 struct btrfs_extent_data_ref);
7760                                 add_data_backref(extent_cache,
7761                                         key.objectid, 0,
7762                                         btrfs_extent_data_ref_root(buf, ref),
7763                                         btrfs_extent_data_ref_objectid(buf,
7764                                                                        ref),
7765                                         btrfs_extent_data_ref_offset(buf, ref),
7766                                         btrfs_extent_data_ref_count(buf, ref),
7767                                         0, root->sectorsize);
7768                                 continue;
7769                         }
7770                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7771                                 struct btrfs_shared_data_ref *ref;
7772                                 ref = btrfs_item_ptr(buf, i,
7773                                                 struct btrfs_shared_data_ref);
7774                                 add_data_backref(extent_cache,
7775                                         key.objectid, key.offset, 0, 0, 0,
7776                                         btrfs_shared_data_ref_count(buf, ref),
7777                                         0, root->sectorsize);
7778                                 continue;
7779                         }
7780                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7781                                 struct bad_item *bad;
7782
7783                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7784                                         continue;
7785                                 if (!owner)
7786                                         continue;
7787                                 bad = malloc(sizeof(struct bad_item));
7788                                 if (!bad)
7789                                         continue;
7790                                 INIT_LIST_HEAD(&bad->list);
7791                                 memcpy(&bad->key, &key,
7792                                        sizeof(struct btrfs_key));
7793                                 bad->root_id = owner;
7794                                 list_add_tail(&bad->list, &delete_items);
7795                                 continue;
7796                         }
7797                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7798                                 continue;
7799                         fi = btrfs_item_ptr(buf, i,
7800                                             struct btrfs_file_extent_item);
7801                         if (btrfs_file_extent_type(buf, fi) ==
7802                             BTRFS_FILE_EXTENT_INLINE)
7803                                 continue;
7804                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7805                                 continue;
7806
7807                         data_bytes_allocated +=
7808                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7809                         if (data_bytes_allocated < root->sectorsize) {
7810                                 abort();
7811                         }
7812                         data_bytes_referenced +=
7813                                 btrfs_file_extent_num_bytes(buf, fi);
7814                         add_data_backref(extent_cache,
7815                                 btrfs_file_extent_disk_bytenr(buf, fi),
7816                                 parent, owner, key.objectid, key.offset -
7817                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7818                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7819                 }
7820         } else {
7821                 int level;
7822                 struct btrfs_key first_key;
7823
7824                 first_key.objectid = 0;
7825
7826                 if (nritems > 0)
7827                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7828                 level = btrfs_header_level(buf);
7829                 for (i = 0; i < nritems; i++) {
7830                         struct extent_record tmpl;
7831
7832                         ptr = btrfs_node_blockptr(buf, i);
7833                         size = root->nodesize;
7834                         btrfs_node_key_to_cpu(buf, &key, i);
7835                         if (ri != NULL) {
7836                                 if ((level == ri->drop_level)
7837                                     && is_dropped_key(&key, &ri->drop_key)) {
7838                                         continue;
7839                                 }
7840                         }
7841
7842                         memset(&tmpl, 0, sizeof(tmpl));
7843                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7844                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7845                         tmpl.start = ptr;
7846                         tmpl.nr = size;
7847                         tmpl.refs = 1;
7848                         tmpl.metadata = 1;
7849                         tmpl.max_size = size;
7850                         ret = add_extent_rec(extent_cache, &tmpl);
7851                         if (ret < 0)
7852                                 goto out;
7853
7854                         ret = add_tree_backref(extent_cache, ptr, parent,
7855                                         owner, 1);
7856                         if (ret < 0) {
7857                                 error("add_tree_backref failed: %s",
7858                                       strerror(-ret));
7859                                 continue;
7860                         }
7861
7862                         if (level > 1) {
7863                                 add_pending(nodes, seen, ptr, size);
7864                         } else {
7865                                 add_pending(pending, seen, ptr, size);
7866                         }
7867                 }
7868                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7869                                       nritems) * sizeof(struct btrfs_key_ptr);
7870         }
7871         total_btree_bytes += buf->len;
7872         if (fs_root_objectid(btrfs_header_owner(buf)))
7873                 total_fs_tree_bytes += buf->len;
7874         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7875                 total_extent_tree_bytes += buf->len;
7876         if (!found_old_backref &&
7877             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7878             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7879             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7880                 found_old_backref = 1;
7881 out:
7882         free_extent_buffer(buf);
7883         return ret;
7884 }
7885
7886 static int add_root_to_pending(struct extent_buffer *buf,
7887                                struct cache_tree *extent_cache,
7888                                struct cache_tree *pending,
7889                                struct cache_tree *seen,
7890                                struct cache_tree *nodes,
7891                                u64 objectid)
7892 {
7893         struct extent_record tmpl;
7894         int ret;
7895
7896         if (btrfs_header_level(buf) > 0)
7897                 add_pending(nodes, seen, buf->start, buf->len);
7898         else
7899                 add_pending(pending, seen, buf->start, buf->len);
7900
7901         memset(&tmpl, 0, sizeof(tmpl));
7902         tmpl.start = buf->start;
7903         tmpl.nr = buf->len;
7904         tmpl.is_root = 1;
7905         tmpl.refs = 1;
7906         tmpl.metadata = 1;
7907         tmpl.max_size = buf->len;
7908         add_extent_rec(extent_cache, &tmpl);
7909
7910         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7911             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7912                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7913                                 0, 1);
7914         else
7915                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7916                                 1);
7917         return ret;
7918 }
7919
7920 /* as we fix the tree, we might be deleting blocks that
7921  * we're tracking for repair.  This hook makes sure we
7922  * remove any backrefs for blocks as we are fixing them.
7923  */
7924 static int free_extent_hook(struct btrfs_trans_handle *trans,
7925                             struct btrfs_root *root,
7926                             u64 bytenr, u64 num_bytes, u64 parent,
7927                             u64 root_objectid, u64 owner, u64 offset,
7928                             int refs_to_drop)
7929 {
7930         struct extent_record *rec;
7931         struct cache_extent *cache;
7932         int is_data;
7933         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7934
7935         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7936         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7937         if (!cache)
7938                 return 0;
7939
7940         rec = container_of(cache, struct extent_record, cache);
7941         if (is_data) {
7942                 struct data_backref *back;
7943                 back = find_data_backref(rec, parent, root_objectid, owner,
7944                                          offset, 1, bytenr, num_bytes);
7945                 if (!back)
7946                         goto out;
7947                 if (back->node.found_ref) {
7948                         back->found_ref -= refs_to_drop;
7949                         if (rec->refs)
7950                                 rec->refs -= refs_to_drop;
7951                 }
7952                 if (back->node.found_extent_tree) {
7953                         back->num_refs -= refs_to_drop;
7954                         if (rec->extent_item_refs)
7955                                 rec->extent_item_refs -= refs_to_drop;
7956                 }
7957                 if (back->found_ref == 0)
7958                         back->node.found_ref = 0;
7959                 if (back->num_refs == 0)
7960                         back->node.found_extent_tree = 0;
7961
7962                 if (!back->node.found_extent_tree && back->node.found_ref) {
7963                         list_del(&back->node.list);
7964                         free(back);
7965                 }
7966         } else {
7967                 struct tree_backref *back;
7968                 back = find_tree_backref(rec, parent, root_objectid);
7969                 if (!back)
7970                         goto out;
7971                 if (back->node.found_ref) {
7972                         if (rec->refs)
7973                                 rec->refs--;
7974                         back->node.found_ref = 0;
7975                 }
7976                 if (back->node.found_extent_tree) {
7977                         if (rec->extent_item_refs)
7978                                 rec->extent_item_refs--;
7979                         back->node.found_extent_tree = 0;
7980                 }
7981                 if (!back->node.found_extent_tree && back->node.found_ref) {
7982                         list_del(&back->node.list);
7983                         free(back);
7984                 }
7985         }
7986         maybe_free_extent_rec(extent_cache, rec);
7987 out:
7988         return 0;
7989 }
7990
7991 static int delete_extent_records(struct btrfs_trans_handle *trans,
7992                                  struct btrfs_root *root,
7993                                  struct btrfs_path *path,
7994                                  u64 bytenr)
7995 {
7996         struct btrfs_key key;
7997         struct btrfs_key found_key;
7998         struct extent_buffer *leaf;
7999         int ret;
8000         int slot;
8001
8002
8003         key.objectid = bytenr;
8004         key.type = (u8)-1;
8005         key.offset = (u64)-1;
8006
8007         while(1) {
8008                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8009                                         &key, path, 0, 1);
8010                 if (ret < 0)
8011                         break;
8012
8013                 if (ret > 0) {
8014                         ret = 0;
8015                         if (path->slots[0] == 0)
8016                                 break;
8017                         path->slots[0]--;
8018                 }
8019                 ret = 0;
8020
8021                 leaf = path->nodes[0];
8022                 slot = path->slots[0];
8023
8024                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8025                 if (found_key.objectid != bytenr)
8026                         break;
8027
8028                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8029                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8030                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8031                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8032                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8033                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8034                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8035                         btrfs_release_path(path);
8036                         if (found_key.type == 0) {
8037                                 if (found_key.offset == 0)
8038                                         break;
8039                                 key.offset = found_key.offset - 1;
8040                                 key.type = found_key.type;
8041                         }
8042                         key.type = found_key.type - 1;
8043                         key.offset = (u64)-1;
8044                         continue;
8045                 }
8046
8047                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8048                         found_key.objectid, found_key.type, found_key.offset);
8049
8050                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8051                 if (ret)
8052                         break;
8053                 btrfs_release_path(path);
8054
8055                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8056                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8057                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8058                                 found_key.offset : root->nodesize;
8059
8060                         ret = btrfs_update_block_group(trans, root, bytenr,
8061                                                        bytes, 0, 0);
8062                         if (ret)
8063                                 break;
8064                 }
8065         }
8066
8067         btrfs_release_path(path);
8068         return ret;
8069 }
8070
8071 /*
8072  * for a single backref, this will allocate a new extent
8073  * and add the backref to it.
8074  */
8075 static int record_extent(struct btrfs_trans_handle *trans,
8076                          struct btrfs_fs_info *info,
8077                          struct btrfs_path *path,
8078                          struct extent_record *rec,
8079                          struct extent_backref *back,
8080                          int allocated, u64 flags)
8081 {
8082         int ret = 0;
8083         struct btrfs_root *extent_root = info->extent_root;
8084         struct extent_buffer *leaf;
8085         struct btrfs_key ins_key;
8086         struct btrfs_extent_item *ei;
8087         struct data_backref *dback;
8088         struct btrfs_tree_block_info *bi;
8089
8090         if (!back->is_data)
8091                 rec->max_size = max_t(u64, rec->max_size,
8092                                     info->extent_root->nodesize);
8093
8094         if (!allocated) {
8095                 u32 item_size = sizeof(*ei);
8096
8097                 if (!back->is_data)
8098                         item_size += sizeof(*bi);
8099
8100                 ins_key.objectid = rec->start;
8101                 ins_key.offset = rec->max_size;
8102                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8103
8104                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8105                                         &ins_key, item_size);
8106                 if (ret)
8107                         goto fail;
8108
8109                 leaf = path->nodes[0];
8110                 ei = btrfs_item_ptr(leaf, path->slots[0],
8111                                     struct btrfs_extent_item);
8112
8113                 btrfs_set_extent_refs(leaf, ei, 0);
8114                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8115
8116                 if (back->is_data) {
8117                         btrfs_set_extent_flags(leaf, ei,
8118                                                BTRFS_EXTENT_FLAG_DATA);
8119                 } else {
8120                         struct btrfs_disk_key copy_key;;
8121
8122                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8123                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8124                                              sizeof(*bi));
8125
8126                         btrfs_set_disk_key_objectid(&copy_key,
8127                                                     rec->info_objectid);
8128                         btrfs_set_disk_key_type(&copy_key, 0);
8129                         btrfs_set_disk_key_offset(&copy_key, 0);
8130
8131                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8132                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8133
8134                         btrfs_set_extent_flags(leaf, ei,
8135                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8136                 }
8137
8138                 btrfs_mark_buffer_dirty(leaf);
8139                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8140                                                rec->max_size, 1, 0);
8141                 if (ret)
8142                         goto fail;
8143                 btrfs_release_path(path);
8144         }
8145
8146         if (back->is_data) {
8147                 u64 parent;
8148                 int i;
8149
8150                 dback = to_data_backref(back);
8151                 if (back->full_backref)
8152                         parent = dback->parent;
8153                 else
8154                         parent = 0;
8155
8156                 for (i = 0; i < dback->found_ref; i++) {
8157                         /* if parent != 0, we're doing a full backref
8158                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8159                          * just makes the backref allocator create a data
8160                          * backref
8161                          */
8162                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8163                                                    rec->start, rec->max_size,
8164                                                    parent,
8165                                                    dback->root,
8166                                                    parent ?
8167                                                    BTRFS_FIRST_FREE_OBJECTID :
8168                                                    dback->owner,
8169                                                    dback->offset);
8170                         if (ret)
8171                                 break;
8172                 }
8173                 fprintf(stderr, "adding new data backref"
8174                                 " on %llu %s %llu owner %llu"
8175                                 " offset %llu found %d\n",
8176                                 (unsigned long long)rec->start,
8177                                 back->full_backref ?
8178                                 "parent" : "root",
8179                                 back->full_backref ?
8180                                 (unsigned long long)parent :
8181                                 (unsigned long long)dback->root,
8182                                 (unsigned long long)dback->owner,
8183                                 (unsigned long long)dback->offset,
8184                                 dback->found_ref);
8185         } else {
8186                 u64 parent;
8187                 struct tree_backref *tback;
8188
8189                 tback = to_tree_backref(back);
8190                 if (back->full_backref)
8191                         parent = tback->parent;
8192                 else
8193                         parent = 0;
8194
8195                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8196                                            rec->start, rec->max_size,
8197                                            parent, tback->root, 0, 0);
8198                 fprintf(stderr, "adding new tree backref on "
8199                         "start %llu len %llu parent %llu root %llu\n",
8200                         rec->start, rec->max_size, parent, tback->root);
8201         }
8202 fail:
8203         btrfs_release_path(path);
8204         return ret;
8205 }
8206
8207 static struct extent_entry *find_entry(struct list_head *entries,
8208                                        u64 bytenr, u64 bytes)
8209 {
8210         struct extent_entry *entry = NULL;
8211
8212         list_for_each_entry(entry, entries, list) {
8213                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8214                         return entry;
8215         }
8216
8217         return NULL;
8218 }
8219
8220 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8221 {
8222         struct extent_entry *entry, *best = NULL, *prev = NULL;
8223
8224         list_for_each_entry(entry, entries, list) {
8225                 /*
8226                  * If there are as many broken entries as entries then we know
8227                  * not to trust this particular entry.
8228                  */
8229                 if (entry->broken == entry->count)
8230                         continue;
8231
8232                 /*
8233                  * Special case, when there are only two entries and 'best' is
8234                  * the first one
8235                  */
8236                 if (!prev) {
8237                         best = entry;
8238                         prev = entry;
8239                         continue;
8240                 }
8241
8242                 /*
8243                  * If our current entry == best then we can't be sure our best
8244                  * is really the best, so we need to keep searching.
8245                  */
8246                 if (best && best->count == entry->count) {
8247                         prev = entry;
8248                         best = NULL;
8249                         continue;
8250                 }
8251
8252                 /* Prev == entry, not good enough, have to keep searching */
8253                 if (!prev->broken && prev->count == entry->count)
8254                         continue;
8255
8256                 if (!best)
8257                         best = (prev->count > entry->count) ? prev : entry;
8258                 else if (best->count < entry->count)
8259                         best = entry;
8260                 prev = entry;
8261         }
8262
8263         return best;
8264 }
8265
8266 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8267                       struct data_backref *dback, struct extent_entry *entry)
8268 {
8269         struct btrfs_trans_handle *trans;
8270         struct btrfs_root *root;
8271         struct btrfs_file_extent_item *fi;
8272         struct extent_buffer *leaf;
8273         struct btrfs_key key;
8274         u64 bytenr, bytes;
8275         int ret, err;
8276
8277         key.objectid = dback->root;
8278         key.type = BTRFS_ROOT_ITEM_KEY;
8279         key.offset = (u64)-1;
8280         root = btrfs_read_fs_root(info, &key);
8281         if (IS_ERR(root)) {
8282                 fprintf(stderr, "Couldn't find root for our ref\n");
8283                 return -EINVAL;
8284         }
8285
8286         /*
8287          * The backref points to the original offset of the extent if it was
8288          * split, so we need to search down to the offset we have and then walk
8289          * forward until we find the backref we're looking for.
8290          */
8291         key.objectid = dback->owner;
8292         key.type = BTRFS_EXTENT_DATA_KEY;
8293         key.offset = dback->offset;
8294         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8295         if (ret < 0) {
8296                 fprintf(stderr, "Error looking up ref %d\n", ret);
8297                 return ret;
8298         }
8299
8300         while (1) {
8301                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8302                         ret = btrfs_next_leaf(root, path);
8303                         if (ret) {
8304                                 fprintf(stderr, "Couldn't find our ref, next\n");
8305                                 return -EINVAL;
8306                         }
8307                 }
8308                 leaf = path->nodes[0];
8309                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8310                 if (key.objectid != dback->owner ||
8311                     key.type != BTRFS_EXTENT_DATA_KEY) {
8312                         fprintf(stderr, "Couldn't find our ref, search\n");
8313                         return -EINVAL;
8314                 }
8315                 fi = btrfs_item_ptr(leaf, path->slots[0],
8316                                     struct btrfs_file_extent_item);
8317                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8318                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8319
8320                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8321                         break;
8322                 path->slots[0]++;
8323         }
8324
8325         btrfs_release_path(path);
8326
8327         trans = btrfs_start_transaction(root, 1);
8328         if (IS_ERR(trans))
8329                 return PTR_ERR(trans);
8330
8331         /*
8332          * Ok we have the key of the file extent we want to fix, now we can cow
8333          * down to the thing and fix it.
8334          */
8335         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8336         if (ret < 0) {
8337                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8338                         key.objectid, key.type, key.offset, ret);
8339                 goto out;
8340         }
8341         if (ret > 0) {
8342                 fprintf(stderr, "Well that's odd, we just found this key "
8343                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8344                         key.offset);
8345                 ret = -EINVAL;
8346                 goto out;
8347         }
8348         leaf = path->nodes[0];
8349         fi = btrfs_item_ptr(leaf, path->slots[0],
8350                             struct btrfs_file_extent_item);
8351
8352         if (btrfs_file_extent_compression(leaf, fi) &&
8353             dback->disk_bytenr != entry->bytenr) {
8354                 fprintf(stderr, "Ref doesn't match the record start and is "
8355                         "compressed, please take a btrfs-image of this file "
8356                         "system and send it to a btrfs developer so they can "
8357                         "complete this functionality for bytenr %Lu\n",
8358                         dback->disk_bytenr);
8359                 ret = -EINVAL;
8360                 goto out;
8361         }
8362
8363         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8364                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8365         } else if (dback->disk_bytenr > entry->bytenr) {
8366                 u64 off_diff, offset;
8367
8368                 off_diff = dback->disk_bytenr - entry->bytenr;
8369                 offset = btrfs_file_extent_offset(leaf, fi);
8370                 if (dback->disk_bytenr + offset +
8371                     btrfs_file_extent_num_bytes(leaf, fi) >
8372                     entry->bytenr + entry->bytes) {
8373                         fprintf(stderr, "Ref is past the entry end, please "
8374                                 "take a btrfs-image of this file system and "
8375                                 "send it to a btrfs developer, ref %Lu\n",
8376                                 dback->disk_bytenr);
8377                         ret = -EINVAL;
8378                         goto out;
8379                 }
8380                 offset += off_diff;
8381                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8382                 btrfs_set_file_extent_offset(leaf, fi, offset);
8383         } else if (dback->disk_bytenr < entry->bytenr) {
8384                 u64 offset;
8385
8386                 offset = btrfs_file_extent_offset(leaf, fi);
8387                 if (dback->disk_bytenr + offset < entry->bytenr) {
8388                         fprintf(stderr, "Ref is before the entry start, please"
8389                                 " take a btrfs-image of this file system and "
8390                                 "send it to a btrfs developer, ref %Lu\n",
8391                                 dback->disk_bytenr);
8392                         ret = -EINVAL;
8393                         goto out;
8394                 }
8395
8396                 offset += dback->disk_bytenr;
8397                 offset -= entry->bytenr;
8398                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8399                 btrfs_set_file_extent_offset(leaf, fi, offset);
8400         }
8401
8402         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8403
8404         /*
8405          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8406          * only do this if we aren't using compression, otherwise it's a
8407          * trickier case.
8408          */
8409         if (!btrfs_file_extent_compression(leaf, fi))
8410                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8411         else
8412                 printf("ram bytes may be wrong?\n");
8413         btrfs_mark_buffer_dirty(leaf);
8414 out:
8415         err = btrfs_commit_transaction(trans, root);
8416         btrfs_release_path(path);
8417         return ret ? ret : err;
8418 }
8419
8420 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8421                            struct extent_record *rec)
8422 {
8423         struct extent_backref *back;
8424         struct data_backref *dback;
8425         struct extent_entry *entry, *best = NULL;
8426         LIST_HEAD(entries);
8427         int nr_entries = 0;
8428         int broken_entries = 0;
8429         int ret = 0;
8430         short mismatch = 0;
8431
8432         /*
8433          * Metadata is easy and the backrefs should always agree on bytenr and
8434          * size, if not we've got bigger issues.
8435          */
8436         if (rec->metadata)
8437                 return 0;
8438
8439         list_for_each_entry(back, &rec->backrefs, list) {
8440                 if (back->full_backref || !back->is_data)
8441                         continue;
8442
8443                 dback = to_data_backref(back);
8444
8445                 /*
8446                  * We only pay attention to backrefs that we found a real
8447                  * backref for.
8448                  */
8449                 if (dback->found_ref == 0)
8450                         continue;
8451
8452                 /*
8453                  * For now we only catch when the bytes don't match, not the
8454                  * bytenr.  We can easily do this at the same time, but I want
8455                  * to have a fs image to test on before we just add repair
8456                  * functionality willy-nilly so we know we won't screw up the
8457                  * repair.
8458                  */
8459
8460                 entry = find_entry(&entries, dback->disk_bytenr,
8461                                    dback->bytes);
8462                 if (!entry) {
8463                         entry = malloc(sizeof(struct extent_entry));
8464                         if (!entry) {
8465                                 ret = -ENOMEM;
8466                                 goto out;
8467                         }
8468                         memset(entry, 0, sizeof(*entry));
8469                         entry->bytenr = dback->disk_bytenr;
8470                         entry->bytes = dback->bytes;
8471                         list_add_tail(&entry->list, &entries);
8472                         nr_entries++;
8473                 }
8474
8475                 /*
8476                  * If we only have on entry we may think the entries agree when
8477                  * in reality they don't so we have to do some extra checking.
8478                  */
8479                 if (dback->disk_bytenr != rec->start ||
8480                     dback->bytes != rec->nr || back->broken)
8481                         mismatch = 1;
8482
8483                 if (back->broken) {
8484                         entry->broken++;
8485                         broken_entries++;
8486                 }
8487
8488                 entry->count++;
8489         }
8490
8491         /* Yay all the backrefs agree, carry on good sir */
8492         if (nr_entries <= 1 && !mismatch)
8493                 goto out;
8494
8495         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8496                 "%Lu\n", rec->start);
8497
8498         /*
8499          * First we want to see if the backrefs can agree amongst themselves who
8500          * is right, so figure out which one of the entries has the highest
8501          * count.
8502          */
8503         best = find_most_right_entry(&entries);
8504
8505         /*
8506          * Ok so we may have an even split between what the backrefs think, so
8507          * this is where we use the extent ref to see what it thinks.
8508          */
8509         if (!best) {
8510                 entry = find_entry(&entries, rec->start, rec->nr);
8511                 if (!entry && (!broken_entries || !rec->found_rec)) {
8512                         fprintf(stderr, "Backrefs don't agree with each other "
8513                                 "and extent record doesn't agree with anybody,"
8514                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8515                                 rec->start, rec->nr);
8516                         ret = -EINVAL;
8517                         goto out;
8518                 } else if (!entry) {
8519                         /*
8520                          * Ok our backrefs were broken, we'll assume this is the
8521                          * correct value and add an entry for this range.
8522                          */
8523                         entry = malloc(sizeof(struct extent_entry));
8524                         if (!entry) {
8525                                 ret = -ENOMEM;
8526                                 goto out;
8527                         }
8528                         memset(entry, 0, sizeof(*entry));
8529                         entry->bytenr = rec->start;
8530                         entry->bytes = rec->nr;
8531                         list_add_tail(&entry->list, &entries);
8532                         nr_entries++;
8533                 }
8534                 entry->count++;
8535                 best = find_most_right_entry(&entries);
8536                 if (!best) {
8537                         fprintf(stderr, "Backrefs and extent record evenly "
8538                                 "split on who is right, this is going to "
8539                                 "require user input to fix bytenr %Lu bytes "
8540                                 "%Lu\n", rec->start, rec->nr);
8541                         ret = -EINVAL;
8542                         goto out;
8543                 }
8544         }
8545
8546         /*
8547          * I don't think this can happen currently as we'll abort() if we catch
8548          * this case higher up, but in case somebody removes that we still can't
8549          * deal with it properly here yet, so just bail out of that's the case.
8550          */
8551         if (best->bytenr != rec->start) {
8552                 fprintf(stderr, "Extent start and backref starts don't match, "
8553                         "please use btrfs-image on this file system and send "
8554                         "it to a btrfs developer so they can make fsck fix "
8555                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8556                         rec->start, rec->nr);
8557                 ret = -EINVAL;
8558                 goto out;
8559         }
8560
8561         /*
8562          * Ok great we all agreed on an extent record, let's go find the real
8563          * references and fix up the ones that don't match.
8564          */
8565         list_for_each_entry(back, &rec->backrefs, list) {
8566                 if (back->full_backref || !back->is_data)
8567                         continue;
8568
8569                 dback = to_data_backref(back);
8570
8571                 /*
8572                  * Still ignoring backrefs that don't have a real ref attached
8573                  * to them.
8574                  */
8575                 if (dback->found_ref == 0)
8576                         continue;
8577
8578                 if (dback->bytes == best->bytes &&
8579                     dback->disk_bytenr == best->bytenr)
8580                         continue;
8581
8582                 ret = repair_ref(info, path, dback, best);
8583                 if (ret)
8584                         goto out;
8585         }
8586
8587         /*
8588          * Ok we messed with the actual refs, which means we need to drop our
8589          * entire cache and go back and rescan.  I know this is a huge pain and
8590          * adds a lot of extra work, but it's the only way to be safe.  Once all
8591          * the backrefs agree we may not need to do anything to the extent
8592          * record itself.
8593          */
8594         ret = -EAGAIN;
8595 out:
8596         while (!list_empty(&entries)) {
8597                 entry = list_entry(entries.next, struct extent_entry, list);
8598                 list_del_init(&entry->list);
8599                 free(entry);
8600         }
8601         return ret;
8602 }
8603
8604 static int process_duplicates(struct cache_tree *extent_cache,
8605                               struct extent_record *rec)
8606 {
8607         struct extent_record *good, *tmp;
8608         struct cache_extent *cache;
8609         int ret;
8610
8611         /*
8612          * If we found a extent record for this extent then return, or if we
8613          * have more than one duplicate we are likely going to need to delete
8614          * something.
8615          */
8616         if (rec->found_rec || rec->num_duplicates > 1)
8617                 return 0;
8618
8619         /* Shouldn't happen but just in case */
8620         BUG_ON(!rec->num_duplicates);
8621
8622         /*
8623          * So this happens if we end up with a backref that doesn't match the
8624          * actual extent entry.  So either the backref is bad or the extent
8625          * entry is bad.  Either way we want to have the extent_record actually
8626          * reflect what we found in the extent_tree, so we need to take the
8627          * duplicate out and use that as the extent_record since the only way we
8628          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8629          */
8630         remove_cache_extent(extent_cache, &rec->cache);
8631
8632         good = to_extent_record(rec->dups.next);
8633         list_del_init(&good->list);
8634         INIT_LIST_HEAD(&good->backrefs);
8635         INIT_LIST_HEAD(&good->dups);
8636         good->cache.start = good->start;
8637         good->cache.size = good->nr;
8638         good->content_checked = 0;
8639         good->owner_ref_checked = 0;
8640         good->num_duplicates = 0;
8641         good->refs = rec->refs;
8642         list_splice_init(&rec->backrefs, &good->backrefs);
8643         while (1) {
8644                 cache = lookup_cache_extent(extent_cache, good->start,
8645                                             good->nr);
8646                 if (!cache)
8647                         break;
8648                 tmp = container_of(cache, struct extent_record, cache);
8649
8650                 /*
8651                  * If we find another overlapping extent and it's found_rec is
8652                  * set then it's a duplicate and we need to try and delete
8653                  * something.
8654                  */
8655                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8656                         if (list_empty(&good->list))
8657                                 list_add_tail(&good->list,
8658                                               &duplicate_extents);
8659                         good->num_duplicates += tmp->num_duplicates + 1;
8660                         list_splice_init(&tmp->dups, &good->dups);
8661                         list_del_init(&tmp->list);
8662                         list_add_tail(&tmp->list, &good->dups);
8663                         remove_cache_extent(extent_cache, &tmp->cache);
8664                         continue;
8665                 }
8666
8667                 /*
8668                  * Ok we have another non extent item backed extent rec, so lets
8669                  * just add it to this extent and carry on like we did above.
8670                  */
8671                 good->refs += tmp->refs;
8672                 list_splice_init(&tmp->backrefs, &good->backrefs);
8673                 remove_cache_extent(extent_cache, &tmp->cache);
8674                 free(tmp);
8675         }
8676         ret = insert_cache_extent(extent_cache, &good->cache);
8677         BUG_ON(ret);
8678         free(rec);
8679         return good->num_duplicates ? 0 : 1;
8680 }
8681
8682 static int delete_duplicate_records(struct btrfs_root *root,
8683                                     struct extent_record *rec)
8684 {
8685         struct btrfs_trans_handle *trans;
8686         LIST_HEAD(delete_list);
8687         struct btrfs_path path;
8688         struct extent_record *tmp, *good, *n;
8689         int nr_del = 0;
8690         int ret = 0, err;
8691         struct btrfs_key key;
8692
8693         btrfs_init_path(&path);
8694
8695         good = rec;
8696         /* Find the record that covers all of the duplicates. */
8697         list_for_each_entry(tmp, &rec->dups, list) {
8698                 if (good->start < tmp->start)
8699                         continue;
8700                 if (good->nr > tmp->nr)
8701                         continue;
8702
8703                 if (tmp->start + tmp->nr < good->start + good->nr) {
8704                         fprintf(stderr, "Ok we have overlapping extents that "
8705                                 "aren't completely covered by each other, this "
8706                                 "is going to require more careful thought.  "
8707                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8708                                 tmp->start, tmp->nr, good->start, good->nr);
8709                         abort();
8710                 }
8711                 good = tmp;
8712         }
8713
8714         if (good != rec)
8715                 list_add_tail(&rec->list, &delete_list);
8716
8717         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8718                 if (tmp == good)
8719                         continue;
8720                 list_move_tail(&tmp->list, &delete_list);
8721         }
8722
8723         root = root->fs_info->extent_root;
8724         trans = btrfs_start_transaction(root, 1);
8725         if (IS_ERR(trans)) {
8726                 ret = PTR_ERR(trans);
8727                 goto out;
8728         }
8729
8730         list_for_each_entry(tmp, &delete_list, list) {
8731                 if (tmp->found_rec == 0)
8732                         continue;
8733                 key.objectid = tmp->start;
8734                 key.type = BTRFS_EXTENT_ITEM_KEY;
8735                 key.offset = tmp->nr;
8736
8737                 /* Shouldn't happen but just in case */
8738                 if (tmp->metadata) {
8739                         fprintf(stderr, "Well this shouldn't happen, extent "
8740                                 "record overlaps but is metadata? "
8741                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8742                         abort();
8743                 }
8744
8745                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8746                 if (ret) {
8747                         if (ret > 0)
8748                                 ret = -EINVAL;
8749                         break;
8750                 }
8751                 ret = btrfs_del_item(trans, root, &path);
8752                 if (ret)
8753                         break;
8754                 btrfs_release_path(&path);
8755                 nr_del++;
8756         }
8757         err = btrfs_commit_transaction(trans, root);
8758         if (err && !ret)
8759                 ret = err;
8760 out:
8761         while (!list_empty(&delete_list)) {
8762                 tmp = to_extent_record(delete_list.next);
8763                 list_del_init(&tmp->list);
8764                 if (tmp == rec)
8765                         continue;
8766                 free(tmp);
8767         }
8768
8769         while (!list_empty(&rec->dups)) {
8770                 tmp = to_extent_record(rec->dups.next);
8771                 list_del_init(&tmp->list);
8772                 free(tmp);
8773         }
8774
8775         btrfs_release_path(&path);
8776
8777         if (!ret && !nr_del)
8778                 rec->num_duplicates = 0;
8779
8780         return ret ? ret : nr_del;
8781 }
8782
8783 static int find_possible_backrefs(struct btrfs_fs_info *info,
8784                                   struct btrfs_path *path,
8785                                   struct cache_tree *extent_cache,
8786                                   struct extent_record *rec)
8787 {
8788         struct btrfs_root *root;
8789         struct extent_backref *back;
8790         struct data_backref *dback;
8791         struct cache_extent *cache;
8792         struct btrfs_file_extent_item *fi;
8793         struct btrfs_key key;
8794         u64 bytenr, bytes;
8795         int ret;
8796
8797         list_for_each_entry(back, &rec->backrefs, list) {
8798                 /* Don't care about full backrefs (poor unloved backrefs) */
8799                 if (back->full_backref || !back->is_data)
8800                         continue;
8801
8802                 dback = to_data_backref(back);
8803
8804                 /* We found this one, we don't need to do a lookup */
8805                 if (dback->found_ref)
8806                         continue;
8807
8808                 key.objectid = dback->root;
8809                 key.type = BTRFS_ROOT_ITEM_KEY;
8810                 key.offset = (u64)-1;
8811
8812                 root = btrfs_read_fs_root(info, &key);
8813
8814                 /* No root, definitely a bad ref, skip */
8815                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8816                         continue;
8817                 /* Other err, exit */
8818                 if (IS_ERR(root))
8819                         return PTR_ERR(root);
8820
8821                 key.objectid = dback->owner;
8822                 key.type = BTRFS_EXTENT_DATA_KEY;
8823                 key.offset = dback->offset;
8824                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8825                 if (ret) {
8826                         btrfs_release_path(path);
8827                         if (ret < 0)
8828                                 return ret;
8829                         /* Didn't find it, we can carry on */
8830                         ret = 0;
8831                         continue;
8832                 }
8833
8834                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8835                                     struct btrfs_file_extent_item);
8836                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8837                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8838                 btrfs_release_path(path);
8839                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8840                 if (cache) {
8841                         struct extent_record *tmp;
8842                         tmp = container_of(cache, struct extent_record, cache);
8843
8844                         /*
8845                          * If we found an extent record for the bytenr for this
8846                          * particular backref then we can't add it to our
8847                          * current extent record.  We only want to add backrefs
8848                          * that don't have a corresponding extent item in the
8849                          * extent tree since they likely belong to this record
8850                          * and we need to fix it if it doesn't match bytenrs.
8851                          */
8852                         if  (tmp->found_rec)
8853                                 continue;
8854                 }
8855
8856                 dback->found_ref += 1;
8857                 dback->disk_bytenr = bytenr;
8858                 dback->bytes = bytes;
8859
8860                 /*
8861                  * Set this so the verify backref code knows not to trust the
8862                  * values in this backref.
8863                  */
8864                 back->broken = 1;
8865         }
8866
8867         return 0;
8868 }
8869
8870 /*
8871  * Record orphan data ref into corresponding root.
8872  *
8873  * Return 0 if the extent item contains data ref and recorded.
8874  * Return 1 if the extent item contains no useful data ref
8875  *   On that case, it may contains only shared_dataref or metadata backref
8876  *   or the file extent exists(this should be handled by the extent bytenr
8877  *   recovery routine)
8878  * Return <0 if something goes wrong.
8879  */
8880 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8881                                       struct extent_record *rec)
8882 {
8883         struct btrfs_key key;
8884         struct btrfs_root *dest_root;
8885         struct extent_backref *back;
8886         struct data_backref *dback;
8887         struct orphan_data_extent *orphan;
8888         struct btrfs_path path;
8889         int recorded_data_ref = 0;
8890         int ret = 0;
8891
8892         if (rec->metadata)
8893                 return 1;
8894         btrfs_init_path(&path);
8895         list_for_each_entry(back, &rec->backrefs, list) {
8896                 if (back->full_backref || !back->is_data ||
8897                     !back->found_extent_tree)
8898                         continue;
8899                 dback = to_data_backref(back);
8900                 if (dback->found_ref)
8901                         continue;
8902                 key.objectid = dback->root;
8903                 key.type = BTRFS_ROOT_ITEM_KEY;
8904                 key.offset = (u64)-1;
8905
8906                 dest_root = btrfs_read_fs_root(fs_info, &key);
8907
8908                 /* For non-exist root we just skip it */
8909                 if (IS_ERR(dest_root) || !dest_root)
8910                         continue;
8911
8912                 key.objectid = dback->owner;
8913                 key.type = BTRFS_EXTENT_DATA_KEY;
8914                 key.offset = dback->offset;
8915
8916                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8917                 btrfs_release_path(&path);
8918                 /*
8919                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8920                  * we need to record it for inode/file extent rebuild.
8921                  * For ret > 0, we record it only for file extent rebuild.
8922                  * For ret == 0, the file extent exists but only bytenr
8923                  * mismatch, let the original bytenr fix routine to handle,
8924                  * don't record it.
8925                  */
8926                 if (ret == 0)
8927                         continue;
8928                 ret = 0;
8929                 orphan = malloc(sizeof(*orphan));
8930                 if (!orphan) {
8931                         ret = -ENOMEM;
8932                         goto out;
8933                 }
8934                 INIT_LIST_HEAD(&orphan->list);
8935                 orphan->root = dback->root;
8936                 orphan->objectid = dback->owner;
8937                 orphan->offset = dback->offset;
8938                 orphan->disk_bytenr = rec->cache.start;
8939                 orphan->disk_len = rec->cache.size;
8940                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8941                 recorded_data_ref = 1;
8942         }
8943 out:
8944         btrfs_release_path(&path);
8945         if (!ret)
8946                 return !recorded_data_ref;
8947         else
8948                 return ret;
8949 }
8950
8951 /*
8952  * when an incorrect extent item is found, this will delete
8953  * all of the existing entries for it and recreate them
8954  * based on what the tree scan found.
8955  */
8956 static int fixup_extent_refs(struct btrfs_fs_info *info,
8957                              struct cache_tree *extent_cache,
8958                              struct extent_record *rec)
8959 {
8960         struct btrfs_trans_handle *trans = NULL;
8961         int ret;
8962         struct btrfs_path path;
8963         struct list_head *cur = rec->backrefs.next;
8964         struct cache_extent *cache;
8965         struct extent_backref *back;
8966         int allocated = 0;
8967         u64 flags = 0;
8968
8969         if (rec->flag_block_full_backref)
8970                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8971
8972         btrfs_init_path(&path);
8973         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8974                 /*
8975                  * Sometimes the backrefs themselves are so broken they don't
8976                  * get attached to any meaningful rec, so first go back and
8977                  * check any of our backrefs that we couldn't find and throw
8978                  * them into the list if we find the backref so that
8979                  * verify_backrefs can figure out what to do.
8980                  */
8981                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8982                 if (ret < 0)
8983                         goto out;
8984         }
8985
8986         /* step one, make sure all of the backrefs agree */
8987         ret = verify_backrefs(info, &path, rec);
8988         if (ret < 0)
8989                 goto out;
8990
8991         trans = btrfs_start_transaction(info->extent_root, 1);
8992         if (IS_ERR(trans)) {
8993                 ret = PTR_ERR(trans);
8994                 goto out;
8995         }
8996
8997         /* step two, delete all the existing records */
8998         ret = delete_extent_records(trans, info->extent_root, &path,
8999                                     rec->start);
9000
9001         if (ret < 0)
9002                 goto out;
9003
9004         /* was this block corrupt?  If so, don't add references to it */
9005         cache = lookup_cache_extent(info->corrupt_blocks,
9006                                     rec->start, rec->max_size);
9007         if (cache) {
9008                 ret = 0;
9009                 goto out;
9010         }
9011
9012         /* step three, recreate all the refs we did find */
9013         while(cur != &rec->backrefs) {
9014                 back = to_extent_backref(cur);
9015                 cur = cur->next;
9016
9017                 /*
9018                  * if we didn't find any references, don't create a
9019                  * new extent record
9020                  */
9021                 if (!back->found_ref)
9022                         continue;
9023
9024                 rec->bad_full_backref = 0;
9025                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9026                 allocated = 1;
9027
9028                 if (ret)
9029                         goto out;
9030         }
9031 out:
9032         if (trans) {
9033                 int err = btrfs_commit_transaction(trans, info->extent_root);
9034                 if (!ret)
9035                         ret = err;
9036         }
9037
9038         if (!ret)
9039                 fprintf(stderr, "Repaired extent references for %llu\n",
9040                                 (unsigned long long)rec->start);
9041
9042         btrfs_release_path(&path);
9043         return ret;
9044 }
9045
9046 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9047                               struct extent_record *rec)
9048 {
9049         struct btrfs_trans_handle *trans;
9050         struct btrfs_root *root = fs_info->extent_root;
9051         struct btrfs_path path;
9052         struct btrfs_extent_item *ei;
9053         struct btrfs_key key;
9054         u64 flags;
9055         int ret = 0;
9056
9057         key.objectid = rec->start;
9058         if (rec->metadata) {
9059                 key.type = BTRFS_METADATA_ITEM_KEY;
9060                 key.offset = rec->info_level;
9061         } else {
9062                 key.type = BTRFS_EXTENT_ITEM_KEY;
9063                 key.offset = rec->max_size;
9064         }
9065
9066         trans = btrfs_start_transaction(root, 0);
9067         if (IS_ERR(trans))
9068                 return PTR_ERR(trans);
9069
9070         btrfs_init_path(&path);
9071         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9072         if (ret < 0) {
9073                 btrfs_release_path(&path);
9074                 btrfs_commit_transaction(trans, root);
9075                 return ret;
9076         } else if (ret) {
9077                 fprintf(stderr, "Didn't find extent for %llu\n",
9078                         (unsigned long long)rec->start);
9079                 btrfs_release_path(&path);
9080                 btrfs_commit_transaction(trans, root);
9081                 return -ENOENT;
9082         }
9083
9084         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9085                             struct btrfs_extent_item);
9086         flags = btrfs_extent_flags(path.nodes[0], ei);
9087         if (rec->flag_block_full_backref) {
9088                 fprintf(stderr, "setting full backref on %llu\n",
9089                         (unsigned long long)key.objectid);
9090                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9091         } else {
9092                 fprintf(stderr, "clearing full backref on %llu\n",
9093                         (unsigned long long)key.objectid);
9094                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9095         }
9096         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9097         btrfs_mark_buffer_dirty(path.nodes[0]);
9098         btrfs_release_path(&path);
9099         ret = btrfs_commit_transaction(trans, root);
9100         if (!ret)
9101                 fprintf(stderr, "Repaired extent flags for %llu\n",
9102                                 (unsigned long long)rec->start);
9103
9104         return ret;
9105 }
9106
9107 /* right now we only prune from the extent allocation tree */
9108 static int prune_one_block(struct btrfs_trans_handle *trans,
9109                            struct btrfs_fs_info *info,
9110                            struct btrfs_corrupt_block *corrupt)
9111 {
9112         int ret;
9113         struct btrfs_path path;
9114         struct extent_buffer *eb;
9115         u64 found;
9116         int slot;
9117         int nritems;
9118         int level = corrupt->level + 1;
9119
9120         btrfs_init_path(&path);
9121 again:
9122         /* we want to stop at the parent to our busted block */
9123         path.lowest_level = level;
9124
9125         ret = btrfs_search_slot(trans, info->extent_root,
9126                                 &corrupt->key, &path, -1, 1);
9127
9128         if (ret < 0)
9129                 goto out;
9130
9131         eb = path.nodes[level];
9132         if (!eb) {
9133                 ret = -ENOENT;
9134                 goto out;
9135         }
9136
9137         /*
9138          * hopefully the search gave us the block we want to prune,
9139          * lets try that first
9140          */
9141         slot = path.slots[level];
9142         found =  btrfs_node_blockptr(eb, slot);
9143         if (found == corrupt->cache.start)
9144                 goto del_ptr;
9145
9146         nritems = btrfs_header_nritems(eb);
9147
9148         /* the search failed, lets scan this node and hope we find it */
9149         for (slot = 0; slot < nritems; slot++) {
9150                 found =  btrfs_node_blockptr(eb, slot);
9151                 if (found == corrupt->cache.start)
9152                         goto del_ptr;
9153         }
9154         /*
9155          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9156          * to this block
9157          */
9158         if (eb == info->extent_root->node) {
9159                 ret = -ENOENT;
9160                 goto out;
9161         } else {
9162                 level++;
9163                 btrfs_release_path(&path);
9164                 goto again;
9165         }
9166
9167 del_ptr:
9168         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9169         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9170
9171 out:
9172         btrfs_release_path(&path);
9173         return ret;
9174 }
9175
9176 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9177 {
9178         struct btrfs_trans_handle *trans = NULL;
9179         struct cache_extent *cache;
9180         struct btrfs_corrupt_block *corrupt;
9181
9182         while (1) {
9183                 cache = search_cache_extent(info->corrupt_blocks, 0);
9184                 if (!cache)
9185                         break;
9186                 if (!trans) {
9187                         trans = btrfs_start_transaction(info->extent_root, 1);
9188                         if (IS_ERR(trans))
9189                                 return PTR_ERR(trans);
9190                 }
9191                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9192                 prune_one_block(trans, info, corrupt);
9193                 remove_cache_extent(info->corrupt_blocks, cache);
9194         }
9195         if (trans)
9196                 return btrfs_commit_transaction(trans, info->extent_root);
9197         return 0;
9198 }
9199
9200 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9201 {
9202         struct btrfs_block_group_cache *cache;
9203         u64 start, end;
9204         int ret;
9205
9206         while (1) {
9207                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9208                                             &start, &end, EXTENT_DIRTY);
9209                 if (ret)
9210                         break;
9211                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9212         }
9213
9214         start = 0;
9215         while (1) {
9216                 cache = btrfs_lookup_first_block_group(fs_info, start);
9217                 if (!cache)
9218                         break;
9219                 if (cache->cached)
9220                         cache->cached = 0;
9221                 start = cache->key.objectid + cache->key.offset;
9222         }
9223 }
9224
9225 static int check_extent_refs(struct btrfs_root *root,
9226                              struct cache_tree *extent_cache)
9227 {
9228         struct extent_record *rec;
9229         struct cache_extent *cache;
9230         int ret = 0;
9231         int had_dups = 0;
9232
9233         if (repair) {
9234                 /*
9235                  * if we're doing a repair, we have to make sure
9236                  * we don't allocate from the problem extents.
9237                  * In the worst case, this will be all the
9238                  * extents in the FS
9239                  */
9240                 cache = search_cache_extent(extent_cache, 0);
9241                 while(cache) {
9242                         rec = container_of(cache, struct extent_record, cache);
9243                         set_extent_dirty(root->fs_info->excluded_extents,
9244                                          rec->start,
9245                                          rec->start + rec->max_size - 1);
9246                         cache = next_cache_extent(cache);
9247                 }
9248
9249                 /* pin down all the corrupted blocks too */
9250                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9251                 while(cache) {
9252                         set_extent_dirty(root->fs_info->excluded_extents,
9253                                          cache->start,
9254                                          cache->start + cache->size - 1);
9255                         cache = next_cache_extent(cache);
9256                 }
9257                 prune_corrupt_blocks(root->fs_info);
9258                 reset_cached_block_groups(root->fs_info);
9259         }
9260
9261         reset_cached_block_groups(root->fs_info);
9262
9263         /*
9264          * We need to delete any duplicate entries we find first otherwise we
9265          * could mess up the extent tree when we have backrefs that actually
9266          * belong to a different extent item and not the weird duplicate one.
9267          */
9268         while (repair && !list_empty(&duplicate_extents)) {
9269                 rec = to_extent_record(duplicate_extents.next);
9270                 list_del_init(&rec->list);
9271
9272                 /* Sometimes we can find a backref before we find an actual
9273                  * extent, so we need to process it a little bit to see if there
9274                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9275                  * if this is a backref screwup.  If we need to delete stuff
9276                  * process_duplicates() will return 0, otherwise it will return
9277                  * 1 and we
9278                  */
9279                 if (process_duplicates(extent_cache, rec))
9280                         continue;
9281                 ret = delete_duplicate_records(root, rec);
9282                 if (ret < 0)
9283                         return ret;
9284                 /*
9285                  * delete_duplicate_records will return the number of entries
9286                  * deleted, so if it's greater than 0 then we know we actually
9287                  * did something and we need to remove.
9288                  */
9289                 if (ret)
9290                         had_dups = 1;
9291         }
9292
9293         if (had_dups)
9294                 return -EAGAIN;
9295
9296         while(1) {
9297                 int cur_err = 0;
9298                 int fix = 0;
9299
9300                 cache = search_cache_extent(extent_cache, 0);
9301                 if (!cache)
9302                         break;
9303                 rec = container_of(cache, struct extent_record, cache);
9304                 if (rec->num_duplicates) {
9305                         fprintf(stderr, "extent item %llu has multiple extent "
9306                                 "items\n", (unsigned long long)rec->start);
9307                         cur_err = 1;
9308                 }
9309
9310                 if (rec->refs != rec->extent_item_refs) {
9311                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9312                                 (unsigned long long)rec->start,
9313                                 (unsigned long long)rec->nr);
9314                         fprintf(stderr, "extent item %llu, found %llu\n",
9315                                 (unsigned long long)rec->extent_item_refs,
9316                                 (unsigned long long)rec->refs);
9317                         ret = record_orphan_data_extents(root->fs_info, rec);
9318                         if (ret < 0)
9319                                 goto repair_abort;
9320                         fix = ret;
9321                         cur_err = 1;
9322                 }
9323                 if (all_backpointers_checked(rec, 1)) {
9324                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9325                                 (unsigned long long)rec->start,
9326                                 (unsigned long long)rec->nr);
9327                         fix = 1;
9328                         cur_err = 1;
9329                 }
9330                 if (!rec->owner_ref_checked) {
9331                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9332                                 (unsigned long long)rec->start,
9333                                 (unsigned long long)rec->nr);
9334                         fix = 1;
9335                         cur_err = 1;
9336                 }
9337
9338                 if (repair && fix) {
9339                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9340                         if (ret)
9341                                 goto repair_abort;
9342                 }
9343
9344
9345                 if (rec->bad_full_backref) {
9346                         fprintf(stderr, "bad full backref, on [%llu]\n",
9347                                 (unsigned long long)rec->start);
9348                         if (repair) {
9349                                 ret = fixup_extent_flags(root->fs_info, rec);
9350                                 if (ret)
9351                                         goto repair_abort;
9352                                 fix = 1;
9353                         }
9354                         cur_err = 1;
9355                 }
9356                 /*
9357                  * Although it's not a extent ref's problem, we reuse this
9358                  * routine for error reporting.
9359                  * No repair function yet.
9360                  */
9361                 if (rec->crossing_stripes) {
9362                         fprintf(stderr,
9363                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9364                                 rec->start, rec->start + rec->max_size);
9365                         cur_err = 1;
9366                 }
9367
9368                 if (rec->wrong_chunk_type) {
9369                         fprintf(stderr,
9370                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9371                                 rec->start, rec->start + rec->max_size);
9372                         cur_err = 1;
9373                 }
9374
9375                 remove_cache_extent(extent_cache, cache);
9376                 free_all_extent_backrefs(rec);
9377                 if (!init_extent_tree && repair && (!cur_err || fix))
9378                         clear_extent_dirty(root->fs_info->excluded_extents,
9379                                            rec->start,
9380                                            rec->start + rec->max_size - 1);
9381                 free(rec);
9382         }
9383 repair_abort:
9384         if (repair) {
9385                 if (ret && ret != -EAGAIN) {
9386                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9387                         exit(1);
9388                 } else if (!ret) {
9389                         struct btrfs_trans_handle *trans;
9390
9391                         root = root->fs_info->extent_root;
9392                         trans = btrfs_start_transaction(root, 1);
9393                         if (IS_ERR(trans)) {
9394                                 ret = PTR_ERR(trans);
9395                                 goto repair_abort;
9396                         }
9397
9398                         btrfs_fix_block_accounting(trans, root);
9399                         ret = btrfs_commit_transaction(trans, root);
9400                         if (ret)
9401                                 goto repair_abort;
9402                 }
9403                 return ret;
9404         }
9405         return 0;
9406 }
9407
9408 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9409 {
9410         u64 stripe_size;
9411
9412         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9413                 stripe_size = length;
9414                 stripe_size /= num_stripes;
9415         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9416                 stripe_size = length * 2;
9417                 stripe_size /= num_stripes;
9418         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9419                 stripe_size = length;
9420                 stripe_size /= (num_stripes - 1);
9421         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9422                 stripe_size = length;
9423                 stripe_size /= (num_stripes - 2);
9424         } else {
9425                 stripe_size = length;
9426         }
9427         return stripe_size;
9428 }
9429
9430 /*
9431  * Check the chunk with its block group/dev list ref:
9432  * Return 0 if all refs seems valid.
9433  * Return 1 if part of refs seems valid, need later check for rebuild ref
9434  * like missing block group and needs to search extent tree to rebuild them.
9435  * Return -1 if essential refs are missing and unable to rebuild.
9436  */
9437 static int check_chunk_refs(struct chunk_record *chunk_rec,
9438                             struct block_group_tree *block_group_cache,
9439                             struct device_extent_tree *dev_extent_cache,
9440                             int silent)
9441 {
9442         struct cache_extent *block_group_item;
9443         struct block_group_record *block_group_rec;
9444         struct cache_extent *dev_extent_item;
9445         struct device_extent_record *dev_extent_rec;
9446         u64 devid;
9447         u64 offset;
9448         u64 length;
9449         int metadump_v2 = 0;
9450         int i;
9451         int ret = 0;
9452
9453         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9454                                                chunk_rec->offset,
9455                                                chunk_rec->length);
9456         if (block_group_item) {
9457                 block_group_rec = container_of(block_group_item,
9458                                                struct block_group_record,
9459                                                cache);
9460                 if (chunk_rec->length != block_group_rec->offset ||
9461                     chunk_rec->offset != block_group_rec->objectid ||
9462                     (!metadump_v2 &&
9463                      chunk_rec->type_flags != block_group_rec->flags)) {
9464                         if (!silent)
9465                                 fprintf(stderr,
9466                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9467                                         chunk_rec->objectid,
9468                                         chunk_rec->type,
9469                                         chunk_rec->offset,
9470                                         chunk_rec->length,
9471                                         chunk_rec->offset,
9472                                         chunk_rec->type_flags,
9473                                         block_group_rec->objectid,
9474                                         block_group_rec->type,
9475                                         block_group_rec->offset,
9476                                         block_group_rec->offset,
9477                                         block_group_rec->objectid,
9478                                         block_group_rec->flags);
9479                         ret = -1;
9480                 } else {
9481                         list_del_init(&block_group_rec->list);
9482                         chunk_rec->bg_rec = block_group_rec;
9483                 }
9484         } else {
9485                 if (!silent)
9486                         fprintf(stderr,
9487                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9488                                 chunk_rec->objectid,
9489                                 chunk_rec->type,
9490                                 chunk_rec->offset,
9491                                 chunk_rec->length,
9492                                 chunk_rec->offset,
9493                                 chunk_rec->type_flags);
9494                 ret = 1;
9495         }
9496
9497         if (metadump_v2)
9498                 return ret;
9499
9500         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9501                                     chunk_rec->num_stripes);
9502         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9503                 devid = chunk_rec->stripes[i].devid;
9504                 offset = chunk_rec->stripes[i].offset;
9505                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9506                                                        devid, offset, length);
9507                 if (dev_extent_item) {
9508                         dev_extent_rec = container_of(dev_extent_item,
9509                                                 struct device_extent_record,
9510                                                 cache);
9511                         if (dev_extent_rec->objectid != devid ||
9512                             dev_extent_rec->offset != offset ||
9513                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9514                             dev_extent_rec->length != length) {
9515                                 if (!silent)
9516                                         fprintf(stderr,
9517                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9518                                                 chunk_rec->objectid,
9519                                                 chunk_rec->type,
9520                                                 chunk_rec->offset,
9521                                                 chunk_rec->stripes[i].devid,
9522                                                 chunk_rec->stripes[i].offset,
9523                                                 dev_extent_rec->objectid,
9524                                                 dev_extent_rec->offset,
9525                                                 dev_extent_rec->length);
9526                                 ret = -1;
9527                         } else {
9528                                 list_move(&dev_extent_rec->chunk_list,
9529                                           &chunk_rec->dextents);
9530                         }
9531                 } else {
9532                         if (!silent)
9533                                 fprintf(stderr,
9534                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9535                                         chunk_rec->objectid,
9536                                         chunk_rec->type,
9537                                         chunk_rec->offset,
9538                                         chunk_rec->stripes[i].devid,
9539                                         chunk_rec->stripes[i].offset);
9540                         ret = -1;
9541                 }
9542         }
9543         return ret;
9544 }
9545
9546 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9547 int check_chunks(struct cache_tree *chunk_cache,
9548                  struct block_group_tree *block_group_cache,
9549                  struct device_extent_tree *dev_extent_cache,
9550                  struct list_head *good, struct list_head *bad,
9551                  struct list_head *rebuild, int silent)
9552 {
9553         struct cache_extent *chunk_item;
9554         struct chunk_record *chunk_rec;
9555         struct block_group_record *bg_rec;
9556         struct device_extent_record *dext_rec;
9557         int err;
9558         int ret = 0;
9559
9560         chunk_item = first_cache_extent(chunk_cache);
9561         while (chunk_item) {
9562                 chunk_rec = container_of(chunk_item, struct chunk_record,
9563                                          cache);
9564                 err = check_chunk_refs(chunk_rec, block_group_cache,
9565                                        dev_extent_cache, silent);
9566                 if (err < 0)
9567                         ret = err;
9568                 if (err == 0 && good)
9569                         list_add_tail(&chunk_rec->list, good);
9570                 if (err > 0 && rebuild)
9571                         list_add_tail(&chunk_rec->list, rebuild);
9572                 if (err < 0 && bad)
9573                         list_add_tail(&chunk_rec->list, bad);
9574                 chunk_item = next_cache_extent(chunk_item);
9575         }
9576
9577         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9578                 if (!silent)
9579                         fprintf(stderr,
9580                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9581                                 bg_rec->objectid,
9582                                 bg_rec->offset,
9583                                 bg_rec->flags);
9584                 if (!ret)
9585                         ret = 1;
9586         }
9587
9588         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9589                             chunk_list) {
9590                 if (!silent)
9591                         fprintf(stderr,
9592                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9593                                 dext_rec->objectid,
9594                                 dext_rec->offset,
9595                                 dext_rec->length);
9596                 if (!ret)
9597                         ret = 1;
9598         }
9599         return ret;
9600 }
9601
9602
9603 static int check_device_used(struct device_record *dev_rec,
9604                              struct device_extent_tree *dext_cache)
9605 {
9606         struct cache_extent *cache;
9607         struct device_extent_record *dev_extent_rec;
9608         u64 total_byte = 0;
9609
9610         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9611         while (cache) {
9612                 dev_extent_rec = container_of(cache,
9613                                               struct device_extent_record,
9614                                               cache);
9615                 if (dev_extent_rec->objectid != dev_rec->devid)
9616                         break;
9617
9618                 list_del_init(&dev_extent_rec->device_list);
9619                 total_byte += dev_extent_rec->length;
9620                 cache = next_cache_extent(cache);
9621         }
9622
9623         if (total_byte != dev_rec->byte_used) {
9624                 fprintf(stderr,
9625                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9626                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9627                         dev_rec->type, dev_rec->offset);
9628                 return -1;
9629         } else {
9630                 return 0;
9631         }
9632 }
9633
9634 /* check btrfs_dev_item -> btrfs_dev_extent */
9635 static int check_devices(struct rb_root *dev_cache,
9636                          struct device_extent_tree *dev_extent_cache)
9637 {
9638         struct rb_node *dev_node;
9639         struct device_record *dev_rec;
9640         struct device_extent_record *dext_rec;
9641         int err;
9642         int ret = 0;
9643
9644         dev_node = rb_first(dev_cache);
9645         while (dev_node) {
9646                 dev_rec = container_of(dev_node, struct device_record, node);
9647                 err = check_device_used(dev_rec, dev_extent_cache);
9648                 if (err)
9649                         ret = err;
9650
9651                 dev_node = rb_next(dev_node);
9652         }
9653         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9654                             device_list) {
9655                 fprintf(stderr,
9656                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9657                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9658                 if (!ret)
9659                         ret = 1;
9660         }
9661         return ret;
9662 }
9663
9664 static int add_root_item_to_list(struct list_head *head,
9665                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9666                                   u8 level, u8 drop_level,
9667                                   int level_size, struct btrfs_key *drop_key)
9668 {
9669
9670         struct root_item_record *ri_rec;
9671         ri_rec = malloc(sizeof(*ri_rec));
9672         if (!ri_rec)
9673                 return -ENOMEM;
9674         ri_rec->bytenr = bytenr;
9675         ri_rec->objectid = objectid;
9676         ri_rec->level = level;
9677         ri_rec->level_size = level_size;
9678         ri_rec->drop_level = drop_level;
9679         ri_rec->last_snapshot = last_snapshot;
9680         if (drop_key)
9681                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9682         list_add_tail(&ri_rec->list, head);
9683
9684         return 0;
9685 }
9686
9687 static void free_root_item_list(struct list_head *list)
9688 {
9689         struct root_item_record *ri_rec;
9690
9691         while (!list_empty(list)) {
9692                 ri_rec = list_first_entry(list, struct root_item_record,
9693                                           list);
9694                 list_del_init(&ri_rec->list);
9695                 free(ri_rec);
9696         }
9697 }
9698
9699 static int deal_root_from_list(struct list_head *list,
9700                                struct btrfs_root *root,
9701                                struct block_info *bits,
9702                                int bits_nr,
9703                                struct cache_tree *pending,
9704                                struct cache_tree *seen,
9705                                struct cache_tree *reada,
9706                                struct cache_tree *nodes,
9707                                struct cache_tree *extent_cache,
9708                                struct cache_tree *chunk_cache,
9709                                struct rb_root *dev_cache,
9710                                struct block_group_tree *block_group_cache,
9711                                struct device_extent_tree *dev_extent_cache)
9712 {
9713         int ret = 0;
9714         u64 last;
9715
9716         while (!list_empty(list)) {
9717                 struct root_item_record *rec;
9718                 struct extent_buffer *buf;
9719                 rec = list_entry(list->next,
9720                                  struct root_item_record, list);
9721                 last = 0;
9722                 buf = read_tree_block(root->fs_info->tree_root,
9723                                       rec->bytenr, rec->level_size, 0);
9724                 if (!extent_buffer_uptodate(buf)) {
9725                         free_extent_buffer(buf);
9726                         ret = -EIO;
9727                         break;
9728                 }
9729                 ret = add_root_to_pending(buf, extent_cache, pending,
9730                                     seen, nodes, rec->objectid);
9731                 if (ret < 0)
9732                         break;
9733                 /*
9734                  * To rebuild extent tree, we need deal with snapshot
9735                  * one by one, otherwise we deal with node firstly which
9736                  * can maximize readahead.
9737                  */
9738                 while (1) {
9739                         ret = run_next_block(root, bits, bits_nr, &last,
9740                                              pending, seen, reada, nodes,
9741                                              extent_cache, chunk_cache,
9742                                              dev_cache, block_group_cache,
9743                                              dev_extent_cache, rec);
9744                         if (ret != 0)
9745                                 break;
9746                 }
9747                 free_extent_buffer(buf);
9748                 list_del(&rec->list);
9749                 free(rec);
9750                 if (ret < 0)
9751                         break;
9752         }
9753         while (ret >= 0) {
9754                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9755                                      reada, nodes, extent_cache, chunk_cache,
9756                                      dev_cache, block_group_cache,
9757                                      dev_extent_cache, NULL);
9758                 if (ret != 0) {
9759                         if (ret > 0)
9760                                 ret = 0;
9761                         break;
9762                 }
9763         }
9764         return ret;
9765 }
9766
9767 static int check_chunks_and_extents(struct btrfs_root *root)
9768 {
9769         struct rb_root dev_cache;
9770         struct cache_tree chunk_cache;
9771         struct block_group_tree block_group_cache;
9772         struct device_extent_tree dev_extent_cache;
9773         struct cache_tree extent_cache;
9774         struct cache_tree seen;
9775         struct cache_tree pending;
9776         struct cache_tree reada;
9777         struct cache_tree nodes;
9778         struct extent_io_tree excluded_extents;
9779         struct cache_tree corrupt_blocks;
9780         struct btrfs_path path;
9781         struct btrfs_key key;
9782         struct btrfs_key found_key;
9783         int ret, err = 0;
9784         struct block_info *bits;
9785         int bits_nr;
9786         struct extent_buffer *leaf;
9787         int slot;
9788         struct btrfs_root_item ri;
9789         struct list_head dropping_trees;
9790         struct list_head normal_trees;
9791         struct btrfs_root *root1;
9792         u64 objectid;
9793         u32 level_size;
9794         u8 level;
9795
9796         dev_cache = RB_ROOT;
9797         cache_tree_init(&chunk_cache);
9798         block_group_tree_init(&block_group_cache);
9799         device_extent_tree_init(&dev_extent_cache);
9800
9801         cache_tree_init(&extent_cache);
9802         cache_tree_init(&seen);
9803         cache_tree_init(&pending);
9804         cache_tree_init(&nodes);
9805         cache_tree_init(&reada);
9806         cache_tree_init(&corrupt_blocks);
9807         extent_io_tree_init(&excluded_extents);
9808         INIT_LIST_HEAD(&dropping_trees);
9809         INIT_LIST_HEAD(&normal_trees);
9810
9811         if (repair) {
9812                 root->fs_info->excluded_extents = &excluded_extents;
9813                 root->fs_info->fsck_extent_cache = &extent_cache;
9814                 root->fs_info->free_extent_hook = free_extent_hook;
9815                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9816         }
9817
9818         bits_nr = 1024;
9819         bits = malloc(bits_nr * sizeof(struct block_info));
9820         if (!bits) {
9821                 perror("malloc");
9822                 exit(1);
9823         }
9824
9825         if (ctx.progress_enabled) {
9826                 ctx.tp = TASK_EXTENTS;
9827                 task_start(ctx.info);
9828         }
9829
9830 again:
9831         root1 = root->fs_info->tree_root;
9832         level = btrfs_header_level(root1->node);
9833         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9834                                     root1->node->start, 0, level, 0,
9835                                     root1->nodesize, NULL);
9836         if (ret < 0)
9837                 goto out;
9838         root1 = root->fs_info->chunk_root;
9839         level = btrfs_header_level(root1->node);
9840         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9841                                     root1->node->start, 0, level, 0,
9842                                     root1->nodesize, NULL);
9843         if (ret < 0)
9844                 goto out;
9845         btrfs_init_path(&path);
9846         key.offset = 0;
9847         key.objectid = 0;
9848         key.type = BTRFS_ROOT_ITEM_KEY;
9849         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9850                                         &key, &path, 0, 0);
9851         if (ret < 0)
9852                 goto out;
9853         while(1) {
9854                 leaf = path.nodes[0];
9855                 slot = path.slots[0];
9856                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9857                         ret = btrfs_next_leaf(root, &path);
9858                         if (ret != 0)
9859                                 break;
9860                         leaf = path.nodes[0];
9861                         slot = path.slots[0];
9862                 }
9863                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9864                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9865                         unsigned long offset;
9866                         u64 last_snapshot;
9867
9868                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9869                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9870                         last_snapshot = btrfs_root_last_snapshot(&ri);
9871                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9872                                 level = btrfs_root_level(&ri);
9873                                 level_size = root->nodesize;
9874                                 ret = add_root_item_to_list(&normal_trees,
9875                                                 found_key.objectid,
9876                                                 btrfs_root_bytenr(&ri),
9877                                                 last_snapshot, level,
9878                                                 0, level_size, NULL);
9879                                 if (ret < 0)
9880                                         goto out;
9881                         } else {
9882                                 level = btrfs_root_level(&ri);
9883                                 level_size = root->nodesize;
9884                                 objectid = found_key.objectid;
9885                                 btrfs_disk_key_to_cpu(&found_key,
9886                                                       &ri.drop_progress);
9887                                 ret = add_root_item_to_list(&dropping_trees,
9888                                                 objectid,
9889                                                 btrfs_root_bytenr(&ri),
9890                                                 last_snapshot, level,
9891                                                 ri.drop_level,
9892                                                 level_size, &found_key);
9893                                 if (ret < 0)
9894                                         goto out;
9895                         }
9896                 }
9897                 path.slots[0]++;
9898         }
9899         btrfs_release_path(&path);
9900
9901         /*
9902          * check_block can return -EAGAIN if it fixes something, please keep
9903          * this in mind when dealing with return values from these functions, if
9904          * we get -EAGAIN we want to fall through and restart the loop.
9905          */
9906         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9907                                   &seen, &reada, &nodes, &extent_cache,
9908                                   &chunk_cache, &dev_cache, &block_group_cache,
9909                                   &dev_extent_cache);
9910         if (ret < 0) {
9911                 if (ret == -EAGAIN)
9912                         goto loop;
9913                 goto out;
9914         }
9915         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9916                                   &pending, &seen, &reada, &nodes,
9917                                   &extent_cache, &chunk_cache, &dev_cache,
9918                                   &block_group_cache, &dev_extent_cache);
9919         if (ret < 0) {
9920                 if (ret == -EAGAIN)
9921                         goto loop;
9922                 goto out;
9923         }
9924
9925         ret = check_chunks(&chunk_cache, &block_group_cache,
9926                            &dev_extent_cache, NULL, NULL, NULL, 0);
9927         if (ret) {
9928                 if (ret == -EAGAIN)
9929                         goto loop;
9930                 err = ret;
9931         }
9932
9933         ret = check_extent_refs(root, &extent_cache);
9934         if (ret < 0) {
9935                 if (ret == -EAGAIN)
9936                         goto loop;
9937                 goto out;
9938         }
9939
9940         ret = check_devices(&dev_cache, &dev_extent_cache);
9941         if (ret && err)
9942                 ret = err;
9943
9944 out:
9945         task_stop(ctx.info);
9946         if (repair) {
9947                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9948                 extent_io_tree_cleanup(&excluded_extents);
9949                 root->fs_info->fsck_extent_cache = NULL;
9950                 root->fs_info->free_extent_hook = NULL;
9951                 root->fs_info->corrupt_blocks = NULL;
9952                 root->fs_info->excluded_extents = NULL;
9953         }
9954         free(bits);
9955         free_chunk_cache_tree(&chunk_cache);
9956         free_device_cache_tree(&dev_cache);
9957         free_block_group_tree(&block_group_cache);
9958         free_device_extent_tree(&dev_extent_cache);
9959         free_extent_cache_tree(&seen);
9960         free_extent_cache_tree(&pending);
9961         free_extent_cache_tree(&reada);
9962         free_extent_cache_tree(&nodes);
9963         return ret;
9964 loop:
9965         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9966         free_extent_cache_tree(&seen);
9967         free_extent_cache_tree(&pending);
9968         free_extent_cache_tree(&reada);
9969         free_extent_cache_tree(&nodes);
9970         free_chunk_cache_tree(&chunk_cache);
9971         free_block_group_tree(&block_group_cache);
9972         free_device_cache_tree(&dev_cache);
9973         free_device_extent_tree(&dev_extent_cache);
9974         free_extent_record_cache(&extent_cache);
9975         free_root_item_list(&normal_trees);
9976         free_root_item_list(&dropping_trees);
9977         extent_io_tree_cleanup(&excluded_extents);
9978         goto again;
9979 }
9980
9981 /*
9982  * Check backrefs of a tree block given by @bytenr or @eb.
9983  *
9984  * @root:       the root containing the @bytenr or @eb
9985  * @eb:         tree block extent buffer, can be NULL
9986  * @bytenr:     bytenr of the tree block to search
9987  * @level:      tree level of the tree block
9988  * @owner:      owner of the tree block
9989  *
9990  * Return >0 for any error found and output error message
9991  * Return 0 for no error found
9992  */
9993 static int check_tree_block_ref(struct btrfs_root *root,
9994                                 struct extent_buffer *eb, u64 bytenr,
9995                                 int level, u64 owner)
9996 {
9997         struct btrfs_key key;
9998         struct btrfs_root *extent_root = root->fs_info->extent_root;
9999         struct btrfs_path path;
10000         struct btrfs_extent_item *ei;
10001         struct btrfs_extent_inline_ref *iref;
10002         struct extent_buffer *leaf;
10003         unsigned long end;
10004         unsigned long ptr;
10005         int slot;
10006         int skinny_level;
10007         int type;
10008         u32 nodesize = root->nodesize;
10009         u32 item_size;
10010         u64 offset;
10011         int tree_reloc_root = 0;
10012         int found_ref = 0;
10013         int err = 0;
10014         int ret;
10015
10016         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10017             btrfs_header_bytenr(root->node) == bytenr)
10018                 tree_reloc_root = 1;
10019
10020         btrfs_init_path(&path);
10021         key.objectid = bytenr;
10022         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10023                 key.type = BTRFS_METADATA_ITEM_KEY;
10024         else
10025                 key.type = BTRFS_EXTENT_ITEM_KEY;
10026         key.offset = (u64)-1;
10027
10028         /* Search for the backref in extent tree */
10029         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10030         if (ret < 0) {
10031                 err |= BACKREF_MISSING;
10032                 goto out;
10033         }
10034         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10035         if (ret) {
10036                 err |= BACKREF_MISSING;
10037                 goto out;
10038         }
10039
10040         leaf = path.nodes[0];
10041         slot = path.slots[0];
10042         btrfs_item_key_to_cpu(leaf, &key, slot);
10043
10044         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10045
10046         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10047                 skinny_level = (int)key.offset;
10048                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10049         } else {
10050                 struct btrfs_tree_block_info *info;
10051
10052                 info = (struct btrfs_tree_block_info *)(ei + 1);
10053                 skinny_level = btrfs_tree_block_level(leaf, info);
10054                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10055         }
10056
10057         if (eb) {
10058                 u64 header_gen;
10059                 u64 extent_gen;
10060
10061                 if (!(btrfs_extent_flags(leaf, ei) &
10062                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10063                         error(
10064                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10065                                 key.objectid, nodesize,
10066                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10067                         err = BACKREF_MISMATCH;
10068                 }
10069                 header_gen = btrfs_header_generation(eb);
10070                 extent_gen = btrfs_extent_generation(leaf, ei);
10071                 if (header_gen != extent_gen) {
10072                         error(
10073         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10074                                 key.objectid, nodesize, header_gen,
10075                                 extent_gen);
10076                         err = BACKREF_MISMATCH;
10077                 }
10078                 if (level != skinny_level) {
10079                         error(
10080                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10081                                 key.objectid, nodesize, level, skinny_level);
10082                         err = BACKREF_MISMATCH;
10083                 }
10084                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10085                         error(
10086                         "extent[%llu %u] is referred by other roots than %llu",
10087                                 key.objectid, nodesize, root->objectid);
10088                         err = BACKREF_MISMATCH;
10089                 }
10090         }
10091
10092         /*
10093          * Iterate the extent/metadata item to find the exact backref
10094          */
10095         item_size = btrfs_item_size_nr(leaf, slot);
10096         ptr = (unsigned long)iref;
10097         end = (unsigned long)ei + item_size;
10098         while (ptr < end) {
10099                 iref = (struct btrfs_extent_inline_ref *)ptr;
10100                 type = btrfs_extent_inline_ref_type(leaf, iref);
10101                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10102
10103                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10104                         (offset == root->objectid || offset == owner)) {
10105                         found_ref = 1;
10106                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10107                         /*
10108                          * Backref of tree reloc root points to itself, no need
10109                          * to check backref any more.
10110                          */
10111                         if (tree_reloc_root)
10112                                 found_ref = 1;
10113                         else
10114                         /* Check if the backref points to valid referencer */
10115                                 found_ref = !check_tree_block_ref(root, NULL,
10116                                                 offset, level + 1, owner);
10117                 }
10118
10119                 if (found_ref)
10120                         break;
10121                 ptr += btrfs_extent_inline_ref_size(type);
10122         }
10123
10124         /*
10125          * Inlined extent item doesn't have what we need, check
10126          * TREE_BLOCK_REF_KEY
10127          */
10128         if (!found_ref) {
10129                 btrfs_release_path(&path);
10130                 key.objectid = bytenr;
10131                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10132                 key.offset = root->objectid;
10133
10134                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10135                 if (!ret)
10136                         found_ref = 1;
10137         }
10138         if (!found_ref)
10139                 err |= BACKREF_MISSING;
10140 out:
10141         btrfs_release_path(&path);
10142         if (eb && (err & BACKREF_MISSING))
10143                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10144                         bytenr, nodesize, owner, level);
10145         return err;
10146 }
10147
10148 /*
10149  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10150  *
10151  * Return >0 any error found and output error message
10152  * Return 0 for no error found
10153  */
10154 static int check_extent_data_item(struct btrfs_root *root,
10155                                   struct extent_buffer *eb, int slot)
10156 {
10157         struct btrfs_file_extent_item *fi;
10158         struct btrfs_path path;
10159         struct btrfs_root *extent_root = root->fs_info->extent_root;
10160         struct btrfs_key fi_key;
10161         struct btrfs_key dbref_key;
10162         struct extent_buffer *leaf;
10163         struct btrfs_extent_item *ei;
10164         struct btrfs_extent_inline_ref *iref;
10165         struct btrfs_extent_data_ref *dref;
10166         u64 owner;
10167         u64 disk_bytenr;
10168         u64 disk_num_bytes;
10169         u64 extent_num_bytes;
10170         u64 extent_flags;
10171         u32 item_size;
10172         unsigned long end;
10173         unsigned long ptr;
10174         int type;
10175         u64 ref_root;
10176         int found_dbackref = 0;
10177         int err = 0;
10178         int ret;
10179
10180         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10181         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10182
10183         /* Nothing to check for hole and inline data extents */
10184         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10185             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10186                 return 0;
10187
10188         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10189         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10190         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10191
10192         /* Check unaligned disk_num_bytes and num_bytes */
10193         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10194                 error(
10195 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10196                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10197                         root->sectorsize);
10198                 err |= BYTES_UNALIGNED;
10199         } else {
10200                 data_bytes_allocated += disk_num_bytes;
10201         }
10202         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10203                 error(
10204 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10205                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10206                         root->sectorsize);
10207                 err |= BYTES_UNALIGNED;
10208         } else {
10209                 data_bytes_referenced += extent_num_bytes;
10210         }
10211         owner = btrfs_header_owner(eb);
10212
10213         /* Check the extent item of the file extent in extent tree */
10214         btrfs_init_path(&path);
10215         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10216         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10217         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10218
10219         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10220         if (ret) {
10221                 err |= BACKREF_MISSING;
10222                 goto error;
10223         }
10224
10225         leaf = path.nodes[0];
10226         slot = path.slots[0];
10227         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10228
10229         extent_flags = btrfs_extent_flags(leaf, ei);
10230
10231         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10232                 error(
10233                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10234                     disk_bytenr, disk_num_bytes,
10235                     BTRFS_EXTENT_FLAG_DATA);
10236                 err |= BACKREF_MISMATCH;
10237         }
10238
10239         /* Check data backref inside that extent item */
10240         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10241         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10242         ptr = (unsigned long)iref;
10243         end = (unsigned long)ei + item_size;
10244         while (ptr < end) {
10245                 iref = (struct btrfs_extent_inline_ref *)ptr;
10246                 type = btrfs_extent_inline_ref_type(leaf, iref);
10247                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10248
10249                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10250                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10251                         if (ref_root == owner || ref_root == root->objectid)
10252                                 found_dbackref = 1;
10253                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10254                         found_dbackref = !check_tree_block_ref(root, NULL,
10255                                 btrfs_extent_inline_ref_offset(leaf, iref),
10256                                 0, owner);
10257                 }
10258
10259                 if (found_dbackref)
10260                         break;
10261                 ptr += btrfs_extent_inline_ref_size(type);
10262         }
10263
10264         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10265         if (!found_dbackref) {
10266                 btrfs_release_path(&path);
10267
10268                 btrfs_init_path(&path);
10269                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10270                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10271                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10272                                 fi_key.objectid, fi_key.offset);
10273
10274                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10275                                         &dbref_key, &path, 0, 0);
10276                 if (!ret)
10277                         found_dbackref = 1;
10278         }
10279
10280         if (!found_dbackref)
10281                 err |= BACKREF_MISSING;
10282 error:
10283         btrfs_release_path(&path);
10284         if (err & BACKREF_MISSING) {
10285                 error("data extent[%llu %llu] backref lost",
10286                       disk_bytenr, disk_num_bytes);
10287         }
10288         return err;
10289 }
10290
10291 /*
10292  * Get real tree block level for the case like shared block
10293  * Return >= 0 as tree level
10294  * Return <0 for error
10295  */
10296 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10297 {
10298         struct extent_buffer *eb;
10299         struct btrfs_path path;
10300         struct btrfs_key key;
10301         struct btrfs_extent_item *ei;
10302         u64 flags;
10303         u64 transid;
10304         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10305         u8 backref_level;
10306         u8 header_level;
10307         int ret;
10308
10309         /* Search extent tree for extent generation and level */
10310         key.objectid = bytenr;
10311         key.type = BTRFS_METADATA_ITEM_KEY;
10312         key.offset = (u64)-1;
10313
10314         btrfs_init_path(&path);
10315         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10316         if (ret < 0)
10317                 goto release_out;
10318         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10319         if (ret < 0)
10320                 goto release_out;
10321         if (ret > 0) {
10322                 ret = -ENOENT;
10323                 goto release_out;
10324         }
10325
10326         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10327         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10328                             struct btrfs_extent_item);
10329         flags = btrfs_extent_flags(path.nodes[0], ei);
10330         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10331                 ret = -ENOENT;
10332                 goto release_out;
10333         }
10334
10335         /* Get transid for later read_tree_block() check */
10336         transid = btrfs_extent_generation(path.nodes[0], ei);
10337
10338         /* Get backref level as one source */
10339         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10340                 backref_level = key.offset;
10341         } else {
10342                 struct btrfs_tree_block_info *info;
10343
10344                 info = (struct btrfs_tree_block_info *)(ei + 1);
10345                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10346         }
10347         btrfs_release_path(&path);
10348
10349         /* Get level from tree block as an alternative source */
10350         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10351         if (!extent_buffer_uptodate(eb)) {
10352                 free_extent_buffer(eb);
10353                 return -EIO;
10354         }
10355         header_level = btrfs_header_level(eb);
10356         free_extent_buffer(eb);
10357
10358         if (header_level != backref_level)
10359                 return -EIO;
10360         return header_level;
10361
10362 release_out:
10363         btrfs_release_path(&path);
10364         return ret;
10365 }
10366
10367 /*
10368  * Check if a tree block backref is valid (points to a valid tree block)
10369  * if level == -1, level will be resolved
10370  * Return >0 for any error found and print error message
10371  */
10372 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10373                                     u64 bytenr, int level)
10374 {
10375         struct btrfs_root *root;
10376         struct btrfs_key key;
10377         struct btrfs_path path;
10378         struct extent_buffer *eb;
10379         struct extent_buffer *node;
10380         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10381         int err = 0;
10382         int ret;
10383
10384         /* Query level for level == -1 special case */
10385         if (level == -1)
10386                 level = query_tree_block_level(fs_info, bytenr);
10387         if (level < 0) {
10388                 err |= REFERENCER_MISSING;
10389                 goto out;
10390         }
10391
10392         key.objectid = root_id;
10393         key.type = BTRFS_ROOT_ITEM_KEY;
10394         key.offset = (u64)-1;
10395
10396         root = btrfs_read_fs_root(fs_info, &key);
10397         if (IS_ERR(root)) {
10398                 err |= REFERENCER_MISSING;
10399                 goto out;
10400         }
10401
10402         /* Read out the tree block to get item/node key */
10403         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10404         if (!extent_buffer_uptodate(eb)) {
10405                 err |= REFERENCER_MISSING;
10406                 free_extent_buffer(eb);
10407                 goto out;
10408         }
10409
10410         /* Empty tree, no need to check key */
10411         if (!btrfs_header_nritems(eb) && !level) {
10412                 free_extent_buffer(eb);
10413                 goto out;
10414         }
10415
10416         if (level)
10417                 btrfs_node_key_to_cpu(eb, &key, 0);
10418         else
10419                 btrfs_item_key_to_cpu(eb, &key, 0);
10420
10421         free_extent_buffer(eb);
10422
10423         btrfs_init_path(&path);
10424         path.lowest_level = level;
10425         /* Search with the first key, to ensure we can reach it */
10426         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10427         if (ret < 0) {
10428                 err |= REFERENCER_MISSING;
10429                 goto release_out;
10430         }
10431
10432         node = path.nodes[level];
10433         if (btrfs_header_bytenr(node) != bytenr) {
10434                 error(
10435         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10436                         bytenr, nodesize, bytenr,
10437                         btrfs_header_bytenr(node));
10438                 err |= REFERENCER_MISMATCH;
10439         }
10440         if (btrfs_header_level(node) != level) {
10441                 error(
10442         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10443                         bytenr, nodesize, level,
10444                         btrfs_header_level(node));
10445                 err |= REFERENCER_MISMATCH;
10446         }
10447
10448 release_out:
10449         btrfs_release_path(&path);
10450 out:
10451         if (err & REFERENCER_MISSING) {
10452                 if (level < 0)
10453                         error("extent [%llu %d] lost referencer (owner: %llu)",
10454                                 bytenr, nodesize, root_id);
10455                 else
10456                         error(
10457                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10458                                 bytenr, nodesize, root_id, level);
10459         }
10460
10461         return err;
10462 }
10463
10464 /*
10465  * Check if tree block @eb is tree reloc root.
10466  * Return 0 if it's not or any problem happens
10467  * Return 1 if it's a tree reloc root
10468  */
10469 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10470                                  struct extent_buffer *eb)
10471 {
10472         struct btrfs_root *tree_reloc_root;
10473         struct btrfs_key key;
10474         u64 bytenr = btrfs_header_bytenr(eb);
10475         u64 owner = btrfs_header_owner(eb);
10476         int ret = 0;
10477
10478         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10479         key.offset = owner;
10480         key.type = BTRFS_ROOT_ITEM_KEY;
10481
10482         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10483         if (IS_ERR(tree_reloc_root))
10484                 return 0;
10485
10486         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10487                 ret = 1;
10488         btrfs_free_fs_root(tree_reloc_root);
10489         return ret;
10490 }
10491
10492 /*
10493  * Check referencer for shared block backref
10494  * If level == -1, this function will resolve the level.
10495  */
10496 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10497                                      u64 parent, u64 bytenr, int level)
10498 {
10499         struct extent_buffer *eb;
10500         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10501         u32 nr;
10502         int found_parent = 0;
10503         int i;
10504
10505         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10506         if (!extent_buffer_uptodate(eb))
10507                 goto out;
10508
10509         if (level == -1)
10510                 level = query_tree_block_level(fs_info, bytenr);
10511         if (level < 0)
10512                 goto out;
10513
10514         /* It's possible it's a tree reloc root */
10515         if (parent == bytenr) {
10516                 if (is_tree_reloc_root(fs_info, eb))
10517                         found_parent = 1;
10518                 goto out;
10519         }
10520
10521         if (level + 1 != btrfs_header_level(eb))
10522                 goto out;
10523
10524         nr = btrfs_header_nritems(eb);
10525         for (i = 0; i < nr; i++) {
10526                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10527                         found_parent = 1;
10528                         break;
10529                 }
10530         }
10531 out:
10532         free_extent_buffer(eb);
10533         if (!found_parent) {
10534                 error(
10535         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10536                         bytenr, nodesize, parent, level);
10537                 return REFERENCER_MISSING;
10538         }
10539         return 0;
10540 }
10541
10542 /*
10543  * Check referencer for normal (inlined) data ref
10544  * If len == 0, it will be resolved by searching in extent tree
10545  */
10546 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10547                                      u64 root_id, u64 objectid, u64 offset,
10548                                      u64 bytenr, u64 len, u32 count)
10549 {
10550         struct btrfs_root *root;
10551         struct btrfs_root *extent_root = fs_info->extent_root;
10552         struct btrfs_key key;
10553         struct btrfs_path path;
10554         struct extent_buffer *leaf;
10555         struct btrfs_file_extent_item *fi;
10556         u32 found_count = 0;
10557         int slot;
10558         int ret = 0;
10559
10560         if (!len) {
10561                 key.objectid = bytenr;
10562                 key.type = BTRFS_EXTENT_ITEM_KEY;
10563                 key.offset = (u64)-1;
10564
10565                 btrfs_init_path(&path);
10566                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10567                 if (ret < 0)
10568                         goto out;
10569                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10570                 if (ret)
10571                         goto out;
10572                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10573                 if (key.objectid != bytenr ||
10574                     key.type != BTRFS_EXTENT_ITEM_KEY)
10575                         goto out;
10576                 len = key.offset;
10577                 btrfs_release_path(&path);
10578         }
10579         key.objectid = root_id;
10580         key.type = BTRFS_ROOT_ITEM_KEY;
10581         key.offset = (u64)-1;
10582         btrfs_init_path(&path);
10583
10584         root = btrfs_read_fs_root(fs_info, &key);
10585         if (IS_ERR(root))
10586                 goto out;
10587
10588         key.objectid = objectid;
10589         key.type = BTRFS_EXTENT_DATA_KEY;
10590         /*
10591          * It can be nasty as data backref offset is
10592          * file offset - file extent offset, which is smaller or
10593          * equal to original backref offset.  The only special case is
10594          * overflow.  So we need to special check and do further search.
10595          */
10596         key.offset = offset & (1ULL << 63) ? 0 : offset;
10597
10598         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10599         if (ret < 0)
10600                 goto out;
10601
10602         /*
10603          * Search afterwards to get correct one
10604          * NOTE: As we must do a comprehensive check on the data backref to
10605          * make sure the dref count also matches, we must iterate all file
10606          * extents for that inode.
10607          */
10608         while (1) {
10609                 leaf = path.nodes[0];
10610                 slot = path.slots[0];
10611
10612                 if (slot >= btrfs_header_nritems(leaf))
10613                         goto next;
10614                 btrfs_item_key_to_cpu(leaf, &key, slot);
10615                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10616                         break;
10617                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10618                 /*
10619                  * Except normal disk bytenr and disk num bytes, we still
10620                  * need to do extra check on dbackref offset as
10621                  * dbackref offset = file_offset - file_extent_offset
10622                  */
10623                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10624                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10625                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10626                     offset)
10627                         found_count++;
10628
10629 next:
10630                 ret = btrfs_next_item(root, &path);
10631                 if (ret)
10632                         break;
10633         }
10634 out:
10635         btrfs_release_path(&path);
10636         if (found_count != count) {
10637                 error(
10638 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10639                         bytenr, len, root_id, objectid, offset, count, found_count);
10640                 return REFERENCER_MISSING;
10641         }
10642         return 0;
10643 }
10644
10645 /*
10646  * Check if the referencer of a shared data backref exists
10647  */
10648 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10649                                      u64 parent, u64 bytenr)
10650 {
10651         struct extent_buffer *eb;
10652         struct btrfs_key key;
10653         struct btrfs_file_extent_item *fi;
10654         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10655         u32 nr;
10656         int found_parent = 0;
10657         int i;
10658
10659         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10660         if (!extent_buffer_uptodate(eb))
10661                 goto out;
10662
10663         nr = btrfs_header_nritems(eb);
10664         for (i = 0; i < nr; i++) {
10665                 btrfs_item_key_to_cpu(eb, &key, i);
10666                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10667                         continue;
10668
10669                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10670                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10671                         continue;
10672
10673                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10674                         found_parent = 1;
10675                         break;
10676                 }
10677         }
10678
10679 out:
10680         free_extent_buffer(eb);
10681         if (!found_parent) {
10682                 error("shared extent %llu referencer lost (parent: %llu)",
10683                         bytenr, parent);
10684                 return REFERENCER_MISSING;
10685         }
10686         return 0;
10687 }
10688
10689 /*
10690  * This function will check a given extent item, including its backref and
10691  * itself (like crossing stripe boundary and type)
10692  *
10693  * Since we don't use extent_record anymore, introduce new error bit
10694  */
10695 static int check_extent_item(struct btrfs_fs_info *fs_info,
10696                              struct extent_buffer *eb, int slot)
10697 {
10698         struct btrfs_extent_item *ei;
10699         struct btrfs_extent_inline_ref *iref;
10700         struct btrfs_extent_data_ref *dref;
10701         unsigned long end;
10702         unsigned long ptr;
10703         int type;
10704         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10705         u32 item_size = btrfs_item_size_nr(eb, slot);
10706         u64 flags;
10707         u64 offset;
10708         int metadata = 0;
10709         int level;
10710         struct btrfs_key key;
10711         int ret;
10712         int err = 0;
10713
10714         btrfs_item_key_to_cpu(eb, &key, slot);
10715         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10716                 bytes_used += key.offset;
10717         else
10718                 bytes_used += nodesize;
10719
10720         if (item_size < sizeof(*ei)) {
10721                 /*
10722                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10723                  * old thing when on disk format is still un-determined.
10724                  * No need to care about it anymore
10725                  */
10726                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10727                 return -ENOTTY;
10728         }
10729
10730         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10731         flags = btrfs_extent_flags(eb, ei);
10732
10733         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10734                 metadata = 1;
10735         if (metadata && check_crossing_stripes(global_info, key.objectid,
10736                                                eb->len)) {
10737                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10738                       key.objectid, key.objectid + nodesize);
10739                 err |= CROSSING_STRIPE_BOUNDARY;
10740         }
10741
10742         ptr = (unsigned long)(ei + 1);
10743
10744         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10745                 /* Old EXTENT_ITEM metadata */
10746                 struct btrfs_tree_block_info *info;
10747
10748                 info = (struct btrfs_tree_block_info *)ptr;
10749                 level = btrfs_tree_block_level(eb, info);
10750                 ptr += sizeof(struct btrfs_tree_block_info);
10751         } else {
10752                 /* New METADATA_ITEM */
10753                 level = key.offset;
10754         }
10755         end = (unsigned long)ei + item_size;
10756
10757 next:
10758         /* Reached extent item end normally */
10759         if (ptr == end)
10760                 goto out;
10761
10762         /* Beyond extent item end, wrong item size */
10763         if (ptr > end) {
10764                 err |= ITEM_SIZE_MISMATCH;
10765                 error("extent item at bytenr %llu slot %d has wrong size",
10766                         eb->start, slot);
10767                 goto out;
10768         }
10769
10770         /* Now check every backref in this extent item */
10771         iref = (struct btrfs_extent_inline_ref *)ptr;
10772         type = btrfs_extent_inline_ref_type(eb, iref);
10773         offset = btrfs_extent_inline_ref_offset(eb, iref);
10774         switch (type) {
10775         case BTRFS_TREE_BLOCK_REF_KEY:
10776                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10777                                                level);
10778                 err |= ret;
10779                 break;
10780         case BTRFS_SHARED_BLOCK_REF_KEY:
10781                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10782                                                  level);
10783                 err |= ret;
10784                 break;
10785         case BTRFS_EXTENT_DATA_REF_KEY:
10786                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10787                 ret = check_extent_data_backref(fs_info,
10788                                 btrfs_extent_data_ref_root(eb, dref),
10789                                 btrfs_extent_data_ref_objectid(eb, dref),
10790                                 btrfs_extent_data_ref_offset(eb, dref),
10791                                 key.objectid, key.offset,
10792                                 btrfs_extent_data_ref_count(eb, dref));
10793                 err |= ret;
10794                 break;
10795         case BTRFS_SHARED_DATA_REF_KEY:
10796                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10797                 err |= ret;
10798                 break;
10799         default:
10800                 error("extent[%llu %d %llu] has unknown ref type: %d",
10801                         key.objectid, key.type, key.offset, type);
10802                 err |= UNKNOWN_TYPE;
10803                 goto out;
10804         }
10805
10806         ptr += btrfs_extent_inline_ref_size(type);
10807         goto next;
10808
10809 out:
10810         return err;
10811 }
10812
10813 /*
10814  * Check if a dev extent item is referred correctly by its chunk
10815  */
10816 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10817                                  struct extent_buffer *eb, int slot)
10818 {
10819         struct btrfs_root *chunk_root = fs_info->chunk_root;
10820         struct btrfs_dev_extent *ptr;
10821         struct btrfs_path path;
10822         struct btrfs_key chunk_key;
10823         struct btrfs_key devext_key;
10824         struct btrfs_chunk *chunk;
10825         struct extent_buffer *l;
10826         int num_stripes;
10827         u64 length;
10828         int i;
10829         int found_chunk = 0;
10830         int ret;
10831
10832         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10833         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10834         length = btrfs_dev_extent_length(eb, ptr);
10835
10836         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10837         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10838         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10839
10840         btrfs_init_path(&path);
10841         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10842         if (ret)
10843                 goto out;
10844
10845         l = path.nodes[0];
10846         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10847         if (btrfs_chunk_length(l, chunk) != length)
10848                 goto out;
10849
10850         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10851         for (i = 0; i < num_stripes; i++) {
10852                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10853                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10854
10855                 if (devid == devext_key.objectid &&
10856                     offset == devext_key.offset) {
10857                         found_chunk = 1;
10858                         break;
10859                 }
10860         }
10861 out:
10862         btrfs_release_path(&path);
10863         if (!found_chunk) {
10864                 error(
10865                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10866                         devext_key.objectid, devext_key.offset, length);
10867                 return REFERENCER_MISSING;
10868         }
10869         return 0;
10870 }
10871
10872 /*
10873  * Check if the used space is correct with the dev item
10874  */
10875 static int check_dev_item(struct btrfs_fs_info *fs_info,
10876                           struct extent_buffer *eb, int slot)
10877 {
10878         struct btrfs_root *dev_root = fs_info->dev_root;
10879         struct btrfs_dev_item *dev_item;
10880         struct btrfs_path path;
10881         struct btrfs_key key;
10882         struct btrfs_dev_extent *ptr;
10883         u64 dev_id;
10884         u64 used;
10885         u64 total = 0;
10886         int ret;
10887
10888         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10889         dev_id = btrfs_device_id(eb, dev_item);
10890         used = btrfs_device_bytes_used(eb, dev_item);
10891
10892         key.objectid = dev_id;
10893         key.type = BTRFS_DEV_EXTENT_KEY;
10894         key.offset = 0;
10895
10896         btrfs_init_path(&path);
10897         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10898         if (ret < 0) {
10899                 btrfs_item_key_to_cpu(eb, &key, slot);
10900                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10901                         key.objectid, key.type, key.offset);
10902                 btrfs_release_path(&path);
10903                 return REFERENCER_MISSING;
10904         }
10905
10906         /* Iterate dev_extents to calculate the used space of a device */
10907         while (1) {
10908                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10909                         goto next;
10910
10911                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10912                 if (key.objectid > dev_id)
10913                         break;
10914                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10915                         goto next;
10916
10917                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10918                                      struct btrfs_dev_extent);
10919                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10920 next:
10921                 ret = btrfs_next_item(dev_root, &path);
10922                 if (ret)
10923                         break;
10924         }
10925         btrfs_release_path(&path);
10926
10927         if (used != total) {
10928                 btrfs_item_key_to_cpu(eb, &key, slot);
10929                 error(
10930 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10931                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10932                         BTRFS_DEV_EXTENT_KEY, dev_id);
10933                 return ACCOUNTING_MISMATCH;
10934         }
10935         return 0;
10936 }
10937
10938 /*
10939  * Check a block group item with its referener (chunk) and its used space
10940  * with extent/metadata item
10941  */
10942 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10943                                   struct extent_buffer *eb, int slot)
10944 {
10945         struct btrfs_root *extent_root = fs_info->extent_root;
10946         struct btrfs_root *chunk_root = fs_info->chunk_root;
10947         struct btrfs_block_group_item *bi;
10948         struct btrfs_block_group_item bg_item;
10949         struct btrfs_path path;
10950         struct btrfs_key bg_key;
10951         struct btrfs_key chunk_key;
10952         struct btrfs_key extent_key;
10953         struct btrfs_chunk *chunk;
10954         struct extent_buffer *leaf;
10955         struct btrfs_extent_item *ei;
10956         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10957         u64 flags;
10958         u64 bg_flags;
10959         u64 used;
10960         u64 total = 0;
10961         int ret;
10962         int err = 0;
10963
10964         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10965         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10966         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10967         used = btrfs_block_group_used(&bg_item);
10968         bg_flags = btrfs_block_group_flags(&bg_item);
10969
10970         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10971         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10972         chunk_key.offset = bg_key.objectid;
10973
10974         btrfs_init_path(&path);
10975         /* Search for the referencer chunk */
10976         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10977         if (ret) {
10978                 error(
10979                 "block group[%llu %llu] did not find the related chunk item",
10980                         bg_key.objectid, bg_key.offset);
10981                 err |= REFERENCER_MISSING;
10982         } else {
10983                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10984                                         struct btrfs_chunk);
10985                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10986                                                 bg_key.offset) {
10987                         error(
10988         "block group[%llu %llu] related chunk item length does not match",
10989                                 bg_key.objectid, bg_key.offset);
10990                         err |= REFERENCER_MISMATCH;
10991                 }
10992         }
10993         btrfs_release_path(&path);
10994
10995         /* Search from the block group bytenr */
10996         extent_key.objectid = bg_key.objectid;
10997         extent_key.type = 0;
10998         extent_key.offset = 0;
10999
11000         btrfs_init_path(&path);
11001         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11002         if (ret < 0)
11003                 goto out;
11004
11005         /* Iterate extent tree to account used space */
11006         while (1) {
11007                 leaf = path.nodes[0];
11008
11009                 /* Search slot can point to the last item beyond leaf nritems */
11010                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11011                         goto next;
11012
11013                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11014                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11015                         break;
11016
11017                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11018                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11019                         goto next;
11020                 if (extent_key.objectid < bg_key.objectid)
11021                         goto next;
11022
11023                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11024                         total += nodesize;
11025                 else
11026                         total += extent_key.offset;
11027
11028                 ei = btrfs_item_ptr(leaf, path.slots[0],
11029                                     struct btrfs_extent_item);
11030                 flags = btrfs_extent_flags(leaf, ei);
11031                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11032                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11033                                 error(
11034                         "bad extent[%llu, %llu) type mismatch with chunk",
11035                                         extent_key.objectid,
11036                                         extent_key.objectid + extent_key.offset);
11037                                 err |= CHUNK_TYPE_MISMATCH;
11038                         }
11039                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11040                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11041                                     BTRFS_BLOCK_GROUP_METADATA))) {
11042                                 error(
11043                         "bad extent[%llu, %llu) type mismatch with chunk",
11044                                         extent_key.objectid,
11045                                         extent_key.objectid + nodesize);
11046                                 err |= CHUNK_TYPE_MISMATCH;
11047                         }
11048                 }
11049 next:
11050                 ret = btrfs_next_item(extent_root, &path);
11051                 if (ret)
11052                         break;
11053         }
11054
11055 out:
11056         btrfs_release_path(&path);
11057
11058         if (total != used) {
11059                 error(
11060                 "block group[%llu %llu] used %llu but extent items used %llu",
11061                         bg_key.objectid, bg_key.offset, used, total);
11062                 err |= ACCOUNTING_MISMATCH;
11063         }
11064         return err;
11065 }
11066
11067 /*
11068  * Check a chunk item.
11069  * Including checking all referred dev_extents and block group
11070  */
11071 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11072                             struct extent_buffer *eb, int slot)
11073 {
11074         struct btrfs_root *extent_root = fs_info->extent_root;
11075         struct btrfs_root *dev_root = fs_info->dev_root;
11076         struct btrfs_path path;
11077         struct btrfs_key chunk_key;
11078         struct btrfs_key bg_key;
11079         struct btrfs_key devext_key;
11080         struct btrfs_chunk *chunk;
11081         struct extent_buffer *leaf;
11082         struct btrfs_block_group_item *bi;
11083         struct btrfs_block_group_item bg_item;
11084         struct btrfs_dev_extent *ptr;
11085         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11086         u64 length;
11087         u64 chunk_end;
11088         u64 type;
11089         u64 profile;
11090         int num_stripes;
11091         u64 offset;
11092         u64 objectid;
11093         int i;
11094         int ret;
11095         int err = 0;
11096
11097         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11098         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11099         length = btrfs_chunk_length(eb, chunk);
11100         chunk_end = chunk_key.offset + length;
11101         if (!IS_ALIGNED(length, sectorsize)) {
11102                 error("chunk[%llu %llu) not aligned to %u",
11103                         chunk_key.offset, chunk_end, sectorsize);
11104                 err |= BYTES_UNALIGNED;
11105                 goto out;
11106         }
11107
11108         type = btrfs_chunk_type(eb, chunk);
11109         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11110         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11111                 error("chunk[%llu %llu) has no chunk type",
11112                         chunk_key.offset, chunk_end);
11113                 err |= UNKNOWN_TYPE;
11114         }
11115         if (profile && (profile & (profile - 1))) {
11116                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11117                         chunk_key.offset, chunk_end, profile);
11118                 err |= UNKNOWN_TYPE;
11119         }
11120
11121         bg_key.objectid = chunk_key.offset;
11122         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11123         bg_key.offset = length;
11124
11125         btrfs_init_path(&path);
11126         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11127         if (ret) {
11128                 error(
11129                 "chunk[%llu %llu) did not find the related block group item",
11130                         chunk_key.offset, chunk_end);
11131                 err |= REFERENCER_MISSING;
11132         } else{
11133                 leaf = path.nodes[0];
11134                 bi = btrfs_item_ptr(leaf, path.slots[0],
11135                                     struct btrfs_block_group_item);
11136                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11137                                    sizeof(bg_item));
11138                 if (btrfs_block_group_flags(&bg_item) != type) {
11139                         error(
11140 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11141                                 chunk_key.offset, chunk_end, type,
11142                                 btrfs_block_group_flags(&bg_item));
11143                         err |= REFERENCER_MISSING;
11144                 }
11145         }
11146
11147         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11148         for (i = 0; i < num_stripes; i++) {
11149                 btrfs_release_path(&path);
11150                 btrfs_init_path(&path);
11151                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11152                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11153                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11154
11155                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11156                                         0, 0);
11157                 if (ret)
11158                         goto not_match_dev;
11159
11160                 leaf = path.nodes[0];
11161                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11162                                      struct btrfs_dev_extent);
11163                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11164                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11165                 if (objectid != chunk_key.objectid ||
11166                     offset != chunk_key.offset ||
11167                     btrfs_dev_extent_length(leaf, ptr) != length)
11168                         goto not_match_dev;
11169                 continue;
11170 not_match_dev:
11171                 err |= BACKREF_MISSING;
11172                 error(
11173                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11174                         chunk_key.objectid, chunk_end, i);
11175                 continue;
11176         }
11177         btrfs_release_path(&path);
11178 out:
11179         return err;
11180 }
11181
11182 /*
11183  * Main entry function to check known items and update related accounting info
11184  */
11185 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11186 {
11187         struct btrfs_fs_info *fs_info = root->fs_info;
11188         struct btrfs_key key;
11189         int slot = 0;
11190         int type;
11191         struct btrfs_extent_data_ref *dref;
11192         int ret;
11193         int err = 0;
11194
11195 next:
11196         btrfs_item_key_to_cpu(eb, &key, slot);
11197         type = key.type;
11198
11199         switch (type) {
11200         case BTRFS_EXTENT_DATA_KEY:
11201                 ret = check_extent_data_item(root, eb, slot);
11202                 err |= ret;
11203                 break;
11204         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11205                 ret = check_block_group_item(fs_info, eb, slot);
11206                 err |= ret;
11207                 break;
11208         case BTRFS_DEV_ITEM_KEY:
11209                 ret = check_dev_item(fs_info, eb, slot);
11210                 err |= ret;
11211                 break;
11212         case BTRFS_CHUNK_ITEM_KEY:
11213                 ret = check_chunk_item(fs_info, eb, slot);
11214                 err |= ret;
11215                 break;
11216         case BTRFS_DEV_EXTENT_KEY:
11217                 ret = check_dev_extent_item(fs_info, eb, slot);
11218                 err |= ret;
11219                 break;
11220         case BTRFS_EXTENT_ITEM_KEY:
11221         case BTRFS_METADATA_ITEM_KEY:
11222                 ret = check_extent_item(fs_info, eb, slot);
11223                 err |= ret;
11224                 break;
11225         case BTRFS_EXTENT_CSUM_KEY:
11226                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11227                 break;
11228         case BTRFS_TREE_BLOCK_REF_KEY:
11229                 ret = check_tree_block_backref(fs_info, key.offset,
11230                                                key.objectid, -1);
11231                 err |= ret;
11232                 break;
11233         case BTRFS_EXTENT_DATA_REF_KEY:
11234                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11235                 ret = check_extent_data_backref(fs_info,
11236                                 btrfs_extent_data_ref_root(eb, dref),
11237                                 btrfs_extent_data_ref_objectid(eb, dref),
11238                                 btrfs_extent_data_ref_offset(eb, dref),
11239                                 key.objectid, 0,
11240                                 btrfs_extent_data_ref_count(eb, dref));
11241                 err |= ret;
11242                 break;
11243         case BTRFS_SHARED_BLOCK_REF_KEY:
11244                 ret = check_shared_block_backref(fs_info, key.offset,
11245                                                  key.objectid, -1);
11246                 err |= ret;
11247                 break;
11248         case BTRFS_SHARED_DATA_REF_KEY:
11249                 ret = check_shared_data_backref(fs_info, key.offset,
11250                                                 key.objectid);
11251                 err |= ret;
11252                 break;
11253         default:
11254                 break;
11255         }
11256
11257         if (++slot < btrfs_header_nritems(eb))
11258                 goto next;
11259
11260         return err;
11261 }
11262
11263 /*
11264  * Helper function for later fs/subvol tree check.  To determine if a tree
11265  * block should be checked.
11266  * This function will ensure only the direct referencer with lowest rootid to
11267  * check a fs/subvolume tree block.
11268  *
11269  * Backref check at extent tree would detect errors like missing subvolume
11270  * tree, so we can do aggressive check to reduce duplicated checks.
11271  */
11272 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11273 {
11274         struct btrfs_root *extent_root = root->fs_info->extent_root;
11275         struct btrfs_key key;
11276         struct btrfs_path path;
11277         struct extent_buffer *leaf;
11278         int slot;
11279         struct btrfs_extent_item *ei;
11280         unsigned long ptr;
11281         unsigned long end;
11282         int type;
11283         u32 item_size;
11284         u64 offset;
11285         struct btrfs_extent_inline_ref *iref;
11286         int ret;
11287
11288         btrfs_init_path(&path);
11289         key.objectid = btrfs_header_bytenr(eb);
11290         key.type = BTRFS_METADATA_ITEM_KEY;
11291         key.offset = (u64)-1;
11292
11293         /*
11294          * Any failure in backref resolving means we can't determine
11295          * whom the tree block belongs to.
11296          * So in that case, we need to check that tree block
11297          */
11298         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11299         if (ret < 0)
11300                 goto need_check;
11301
11302         ret = btrfs_previous_extent_item(extent_root, &path,
11303                                          btrfs_header_bytenr(eb));
11304         if (ret)
11305                 goto need_check;
11306
11307         leaf = path.nodes[0];
11308         slot = path.slots[0];
11309         btrfs_item_key_to_cpu(leaf, &key, slot);
11310         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11311
11312         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11313                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11314         } else {
11315                 struct btrfs_tree_block_info *info;
11316
11317                 info = (struct btrfs_tree_block_info *)(ei + 1);
11318                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11319         }
11320
11321         item_size = btrfs_item_size_nr(leaf, slot);
11322         ptr = (unsigned long)iref;
11323         end = (unsigned long)ei + item_size;
11324         while (ptr < end) {
11325                 iref = (struct btrfs_extent_inline_ref *)ptr;
11326                 type = btrfs_extent_inline_ref_type(leaf, iref);
11327                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11328
11329                 /*
11330                  * We only check the tree block if current root is
11331                  * the lowest referencer of it.
11332                  */
11333                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11334                     offset < root->objectid) {
11335                         btrfs_release_path(&path);
11336                         return 0;
11337                 }
11338
11339                 ptr += btrfs_extent_inline_ref_size(type);
11340         }
11341         /*
11342          * Normally we should also check keyed tree block ref, but that may be
11343          * very time consuming.  Inlined ref should already make us skip a lot
11344          * of refs now.  So skip search keyed tree block ref.
11345          */
11346
11347 need_check:
11348         btrfs_release_path(&path);
11349         return 1;
11350 }
11351
11352 /*
11353  * Traversal function for tree block. We will do:
11354  * 1) Skip shared fs/subvolume tree blocks
11355  * 2) Update related bytes accounting
11356  * 3) Pre-order traversal
11357  */
11358 static int traverse_tree_block(struct btrfs_root *root,
11359                                 struct extent_buffer *node)
11360 {
11361         struct extent_buffer *eb;
11362         struct btrfs_key key;
11363         struct btrfs_key drop_key;
11364         int level;
11365         u64 nr;
11366         int i;
11367         int err = 0;
11368         int ret;
11369
11370         /*
11371          * Skip shared fs/subvolume tree block, in that case they will
11372          * be checked by referencer with lowest rootid
11373          */
11374         if (is_fstree(root->objectid) && !should_check(root, node))
11375                 return 0;
11376
11377         /* Update bytes accounting */
11378         total_btree_bytes += node->len;
11379         if (fs_root_objectid(btrfs_header_owner(node)))
11380                 total_fs_tree_bytes += node->len;
11381         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11382                 total_extent_tree_bytes += node->len;
11383         if (!found_old_backref &&
11384             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11385             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11386             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11387                 found_old_backref = 1;
11388
11389         /* pre-order tranversal, check itself first */
11390         level = btrfs_header_level(node);
11391         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11392                                    btrfs_header_level(node),
11393                                    btrfs_header_owner(node));
11394         err |= ret;
11395         if (err)
11396                 error(
11397         "check %s failed root %llu bytenr %llu level %d, force continue check",
11398                         level ? "node":"leaf", root->objectid,
11399                         btrfs_header_bytenr(node), btrfs_header_level(node));
11400
11401         if (!level) {
11402                 btree_space_waste += btrfs_leaf_free_space(root, node);
11403                 ret = check_leaf_items(root, node);
11404                 err |= ret;
11405                 return err;
11406         }
11407
11408         nr = btrfs_header_nritems(node);
11409         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11410         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11411                 sizeof(struct btrfs_key_ptr);
11412
11413         /* Then check all its children */
11414         for (i = 0; i < nr; i++) {
11415                 u64 blocknr = btrfs_node_blockptr(node, i);
11416
11417                 btrfs_node_key_to_cpu(node, &key, i);
11418                 if (level == root->root_item.drop_level &&
11419                     is_dropped_key(&key, &drop_key))
11420                         continue;
11421
11422                 /*
11423                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11424                  * to call the function itself.
11425                  */
11426                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11427                 if (extent_buffer_uptodate(eb)) {
11428                         ret = traverse_tree_block(root, eb);
11429                         err |= ret;
11430                 }
11431                 free_extent_buffer(eb);
11432         }
11433
11434         return err;
11435 }
11436
11437 /*
11438  * Low memory usage version check_chunks_and_extents.
11439  */
11440 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11441 {
11442         struct btrfs_path path;
11443         struct btrfs_key key;
11444         struct btrfs_root *root1;
11445         struct btrfs_root *cur_root;
11446         int err = 0;
11447         int ret;
11448
11449         root1 = root->fs_info->chunk_root;
11450         ret = traverse_tree_block(root1, root1->node);
11451         err |= ret;
11452
11453         root1 = root->fs_info->tree_root;
11454         ret = traverse_tree_block(root1, root1->node);
11455         err |= ret;
11456
11457         btrfs_init_path(&path);
11458         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11459         key.offset = 0;
11460         key.type = BTRFS_ROOT_ITEM_KEY;
11461
11462         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11463         if (ret) {
11464                 error("cannot find extent treet in tree_root");
11465                 goto out;
11466         }
11467
11468         while (1) {
11469                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11470                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11471                         goto next;
11472                 key.offset = (u64)-1;
11473
11474                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11475                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11476                                         &key);
11477                 else
11478                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11479                 if (IS_ERR(cur_root) || !cur_root) {
11480                         error("failed to read tree: %lld", key.objectid);
11481                         goto next;
11482                 }
11483
11484                 ret = traverse_tree_block(cur_root, cur_root->node);
11485                 err |= ret;
11486
11487                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11488                         btrfs_free_fs_root(cur_root);
11489 next:
11490                 ret = btrfs_next_item(root1, &path);
11491                 if (ret)
11492                         goto out;
11493         }
11494
11495 out:
11496         btrfs_release_path(&path);
11497         return err;
11498 }
11499
11500 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11501                            struct btrfs_root *root, int overwrite)
11502 {
11503         struct extent_buffer *c;
11504         struct extent_buffer *old = root->node;
11505         int level;
11506         int ret;
11507         struct btrfs_disk_key disk_key = {0,0,0};
11508
11509         level = 0;
11510
11511         if (overwrite) {
11512                 c = old;
11513                 extent_buffer_get(c);
11514                 goto init;
11515         }
11516         c = btrfs_alloc_free_block(trans, root,
11517                                    root->nodesize,
11518                                    root->root_key.objectid,
11519                                    &disk_key, level, 0, 0);
11520         if (IS_ERR(c)) {
11521                 c = old;
11522                 extent_buffer_get(c);
11523                 overwrite = 1;
11524         }
11525 init:
11526         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11527         btrfs_set_header_level(c, level);
11528         btrfs_set_header_bytenr(c, c->start);
11529         btrfs_set_header_generation(c, trans->transid);
11530         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11531         btrfs_set_header_owner(c, root->root_key.objectid);
11532
11533         write_extent_buffer(c, root->fs_info->fsid,
11534                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11535
11536         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11537                             btrfs_header_chunk_tree_uuid(c),
11538                             BTRFS_UUID_SIZE);
11539
11540         btrfs_mark_buffer_dirty(c);
11541         /*
11542          * this case can happen in the following case:
11543          *
11544          * 1.overwrite previous root.
11545          *
11546          * 2.reinit reloc data root, this is because we skip pin
11547          * down reloc data tree before which means we can allocate
11548          * same block bytenr here.
11549          */
11550         if (old->start == c->start) {
11551                 btrfs_set_root_generation(&root->root_item,
11552                                           trans->transid);
11553                 root->root_item.level = btrfs_header_level(root->node);
11554                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11555                                         &root->root_key, &root->root_item);
11556                 if (ret) {
11557                         free_extent_buffer(c);
11558                         return ret;
11559                 }
11560         }
11561         free_extent_buffer(old);
11562         root->node = c;
11563         add_root_to_dirty_list(root);
11564         return 0;
11565 }
11566
11567 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11568                                 struct extent_buffer *eb, int tree_root)
11569 {
11570         struct extent_buffer *tmp;
11571         struct btrfs_root_item *ri;
11572         struct btrfs_key key;
11573         u64 bytenr;
11574         u32 nodesize;
11575         int level = btrfs_header_level(eb);
11576         int nritems;
11577         int ret;
11578         int i;
11579
11580         /*
11581          * If we have pinned this block before, don't pin it again.
11582          * This can not only avoid forever loop with broken filesystem
11583          * but also give us some speedups.
11584          */
11585         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11586                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11587                 return 0;
11588
11589         btrfs_pin_extent(fs_info, eb->start, eb->len);
11590
11591         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11592         nritems = btrfs_header_nritems(eb);
11593         for (i = 0; i < nritems; i++) {
11594                 if (level == 0) {
11595                         btrfs_item_key_to_cpu(eb, &key, i);
11596                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11597                                 continue;
11598                         /* Skip the extent root and reloc roots */
11599                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11600                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11601                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11602                                 continue;
11603                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11604                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11605
11606                         /*
11607                          * If at any point we start needing the real root we
11608                          * will have to build a stump root for the root we are
11609                          * in, but for now this doesn't actually use the root so
11610                          * just pass in extent_root.
11611                          */
11612                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11613                                               nodesize, 0);
11614                         if (!extent_buffer_uptodate(tmp)) {
11615                                 fprintf(stderr, "Error reading root block\n");
11616                                 return -EIO;
11617                         }
11618                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11619                         free_extent_buffer(tmp);
11620                         if (ret)
11621                                 return ret;
11622                 } else {
11623                         bytenr = btrfs_node_blockptr(eb, i);
11624
11625                         /* If we aren't the tree root don't read the block */
11626                         if (level == 1 && !tree_root) {
11627                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11628                                 continue;
11629                         }
11630
11631                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11632                                               nodesize, 0);
11633                         if (!extent_buffer_uptodate(tmp)) {
11634                                 fprintf(stderr, "Error reading tree block\n");
11635                                 return -EIO;
11636                         }
11637                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11638                         free_extent_buffer(tmp);
11639                         if (ret)
11640                                 return ret;
11641                 }
11642         }
11643
11644         return 0;
11645 }
11646
11647 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11648 {
11649         int ret;
11650
11651         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11652         if (ret)
11653                 return ret;
11654
11655         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11656 }
11657
11658 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11659 {
11660         struct btrfs_block_group_cache *cache;
11661         struct btrfs_path path;
11662         struct extent_buffer *leaf;
11663         struct btrfs_chunk *chunk;
11664         struct btrfs_key key;
11665         int ret;
11666         u64 start;
11667
11668         btrfs_init_path(&path);
11669         key.objectid = 0;
11670         key.type = BTRFS_CHUNK_ITEM_KEY;
11671         key.offset = 0;
11672         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11673         if (ret < 0) {
11674                 btrfs_release_path(&path);
11675                 return ret;
11676         }
11677
11678         /*
11679          * We do this in case the block groups were screwed up and had alloc
11680          * bits that aren't actually set on the chunks.  This happens with
11681          * restored images every time and could happen in real life I guess.
11682          */
11683         fs_info->avail_data_alloc_bits = 0;
11684         fs_info->avail_metadata_alloc_bits = 0;
11685         fs_info->avail_system_alloc_bits = 0;
11686
11687         /* First we need to create the in-memory block groups */
11688         while (1) {
11689                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11690                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11691                         if (ret < 0) {
11692                                 btrfs_release_path(&path);
11693                                 return ret;
11694                         }
11695                         if (ret) {
11696                                 ret = 0;
11697                                 break;
11698                         }
11699                 }
11700                 leaf = path.nodes[0];
11701                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11702                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11703                         path.slots[0]++;
11704                         continue;
11705                 }
11706
11707                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11708                 btrfs_add_block_group(fs_info, 0,
11709                                       btrfs_chunk_type(leaf, chunk),
11710                                       key.objectid, key.offset,
11711                                       btrfs_chunk_length(leaf, chunk));
11712                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11713                                  key.offset + btrfs_chunk_length(leaf, chunk));
11714                 path.slots[0]++;
11715         }
11716         start = 0;
11717         while (1) {
11718                 cache = btrfs_lookup_first_block_group(fs_info, start);
11719                 if (!cache)
11720                         break;
11721                 cache->cached = 1;
11722                 start = cache->key.objectid + cache->key.offset;
11723         }
11724
11725         btrfs_release_path(&path);
11726         return 0;
11727 }
11728
11729 static int reset_balance(struct btrfs_trans_handle *trans,
11730                          struct btrfs_fs_info *fs_info)
11731 {
11732         struct btrfs_root *root = fs_info->tree_root;
11733         struct btrfs_path path;
11734         struct extent_buffer *leaf;
11735         struct btrfs_key key;
11736         int del_slot, del_nr = 0;
11737         int ret;
11738         int found = 0;
11739
11740         btrfs_init_path(&path);
11741         key.objectid = BTRFS_BALANCE_OBJECTID;
11742         key.type = BTRFS_BALANCE_ITEM_KEY;
11743         key.offset = 0;
11744         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11745         if (ret) {
11746                 if (ret > 0)
11747                         ret = 0;
11748                 if (!ret)
11749                         goto reinit_data_reloc;
11750                 else
11751                         goto out;
11752         }
11753
11754         ret = btrfs_del_item(trans, root, &path);
11755         if (ret)
11756                 goto out;
11757         btrfs_release_path(&path);
11758
11759         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11760         key.type = BTRFS_ROOT_ITEM_KEY;
11761         key.offset = 0;
11762         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11763         if (ret < 0)
11764                 goto out;
11765         while (1) {
11766                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11767                         if (!found)
11768                                 break;
11769
11770                         if (del_nr) {
11771                                 ret = btrfs_del_items(trans, root, &path,
11772                                                       del_slot, del_nr);
11773                                 del_nr = 0;
11774                                 if (ret)
11775                                         goto out;
11776                         }
11777                         key.offset++;
11778                         btrfs_release_path(&path);
11779
11780                         found = 0;
11781                         ret = btrfs_search_slot(trans, root, &key, &path,
11782                                                 -1, 1);
11783                         if (ret < 0)
11784                                 goto out;
11785                         continue;
11786                 }
11787                 found = 1;
11788                 leaf = path.nodes[0];
11789                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11790                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11791                         break;
11792                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11793                         path.slots[0]++;
11794                         continue;
11795                 }
11796                 if (!del_nr) {
11797                         del_slot = path.slots[0];
11798                         del_nr = 1;
11799                 } else {
11800                         del_nr++;
11801                 }
11802                 path.slots[0]++;
11803         }
11804
11805         if (del_nr) {
11806                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11807                 if (ret)
11808                         goto out;
11809         }
11810         btrfs_release_path(&path);
11811
11812 reinit_data_reloc:
11813         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11814         key.type = BTRFS_ROOT_ITEM_KEY;
11815         key.offset = (u64)-1;
11816         root = btrfs_read_fs_root(fs_info, &key);
11817         if (IS_ERR(root)) {
11818                 fprintf(stderr, "Error reading data reloc tree\n");
11819                 ret = PTR_ERR(root);
11820                 goto out;
11821         }
11822         record_root_in_trans(trans, root);
11823         ret = btrfs_fsck_reinit_root(trans, root, 0);
11824         if (ret)
11825                 goto out;
11826         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11827 out:
11828         btrfs_release_path(&path);
11829         return ret;
11830 }
11831
11832 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11833                               struct btrfs_fs_info *fs_info)
11834 {
11835         u64 start = 0;
11836         int ret;
11837
11838         /*
11839          * The only reason we don't do this is because right now we're just
11840          * walking the trees we find and pinning down their bytes, we don't look
11841          * at any of the leaves.  In order to do mixed groups we'd have to check
11842          * the leaves of any fs roots and pin down the bytes for any file
11843          * extents we find.  Not hard but why do it if we don't have to?
11844          */
11845         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11846                 fprintf(stderr, "We don't support re-initing the extent tree "
11847                         "for mixed block groups yet, please notify a btrfs "
11848                         "developer you want to do this so they can add this "
11849                         "functionality.\n");
11850                 return -EINVAL;
11851         }
11852
11853         /*
11854          * first we need to walk all of the trees except the extent tree and pin
11855          * down the bytes that are in use so we don't overwrite any existing
11856          * metadata.
11857          */
11858         ret = pin_metadata_blocks(fs_info);
11859         if (ret) {
11860                 fprintf(stderr, "error pinning down used bytes\n");
11861                 return ret;
11862         }
11863
11864         /*
11865          * Need to drop all the block groups since we're going to recreate all
11866          * of them again.
11867          */
11868         btrfs_free_block_groups(fs_info);
11869         ret = reset_block_groups(fs_info);
11870         if (ret) {
11871                 fprintf(stderr, "error resetting the block groups\n");
11872                 return ret;
11873         }
11874
11875         /* Ok we can allocate now, reinit the extent root */
11876         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11877         if (ret) {
11878                 fprintf(stderr, "extent root initialization failed\n");
11879                 /*
11880                  * When the transaction code is updated we should end the
11881                  * transaction, but for now progs only knows about commit so
11882                  * just return an error.
11883                  */
11884                 return ret;
11885         }
11886
11887         /*
11888          * Now we have all the in-memory block groups setup so we can make
11889          * allocations properly, and the metadata we care about is safe since we
11890          * pinned all of it above.
11891          */
11892         while (1) {
11893                 struct btrfs_block_group_cache *cache;
11894
11895                 cache = btrfs_lookup_first_block_group(fs_info, start);
11896                 if (!cache)
11897                         break;
11898                 start = cache->key.objectid + cache->key.offset;
11899                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11900                                         &cache->key, &cache->item,
11901                                         sizeof(cache->item));
11902                 if (ret) {
11903                         fprintf(stderr, "Error adding block group\n");
11904                         return ret;
11905                 }
11906                 btrfs_extent_post_op(trans, fs_info->extent_root);
11907         }
11908
11909         ret = reset_balance(trans, fs_info);
11910         if (ret)
11911                 fprintf(stderr, "error resetting the pending balance\n");
11912
11913         return ret;
11914 }
11915
11916 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11917 {
11918         struct btrfs_path path;
11919         struct btrfs_trans_handle *trans;
11920         struct btrfs_key key;
11921         int ret;
11922
11923         printf("Recowing metadata block %llu\n", eb->start);
11924         key.objectid = btrfs_header_owner(eb);
11925         key.type = BTRFS_ROOT_ITEM_KEY;
11926         key.offset = (u64)-1;
11927
11928         root = btrfs_read_fs_root(root->fs_info, &key);
11929         if (IS_ERR(root)) {
11930                 fprintf(stderr, "Couldn't find owner root %llu\n",
11931                         key.objectid);
11932                 return PTR_ERR(root);
11933         }
11934
11935         trans = btrfs_start_transaction(root, 1);
11936         if (IS_ERR(trans))
11937                 return PTR_ERR(trans);
11938
11939         btrfs_init_path(&path);
11940         path.lowest_level = btrfs_header_level(eb);
11941         if (path.lowest_level)
11942                 btrfs_node_key_to_cpu(eb, &key, 0);
11943         else
11944                 btrfs_item_key_to_cpu(eb, &key, 0);
11945
11946         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11947         btrfs_commit_transaction(trans, root);
11948         btrfs_release_path(&path);
11949         return ret;
11950 }
11951
11952 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11953 {
11954         struct btrfs_path path;
11955         struct btrfs_trans_handle *trans;
11956         struct btrfs_key key;
11957         int ret;
11958
11959         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11960                bad->key.type, bad->key.offset);
11961         key.objectid = bad->root_id;
11962         key.type = BTRFS_ROOT_ITEM_KEY;
11963         key.offset = (u64)-1;
11964
11965         root = btrfs_read_fs_root(root->fs_info, &key);
11966         if (IS_ERR(root)) {
11967                 fprintf(stderr, "Couldn't find owner root %llu\n",
11968                         key.objectid);
11969                 return PTR_ERR(root);
11970         }
11971
11972         trans = btrfs_start_transaction(root, 1);
11973         if (IS_ERR(trans))
11974                 return PTR_ERR(trans);
11975
11976         btrfs_init_path(&path);
11977         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11978         if (ret) {
11979                 if (ret > 0)
11980                         ret = 0;
11981                 goto out;
11982         }
11983         ret = btrfs_del_item(trans, root, &path);
11984 out:
11985         btrfs_commit_transaction(trans, root);
11986         btrfs_release_path(&path);
11987         return ret;
11988 }
11989
11990 static int zero_log_tree(struct btrfs_root *root)
11991 {
11992         struct btrfs_trans_handle *trans;
11993         int ret;
11994
11995         trans = btrfs_start_transaction(root, 1);
11996         if (IS_ERR(trans)) {
11997                 ret = PTR_ERR(trans);
11998                 return ret;
11999         }
12000         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12001         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12002         ret = btrfs_commit_transaction(trans, root);
12003         return ret;
12004 }
12005
12006 static int populate_csum(struct btrfs_trans_handle *trans,
12007                          struct btrfs_root *csum_root, char *buf, u64 start,
12008                          u64 len)
12009 {
12010         u64 offset = 0;
12011         u64 sectorsize;
12012         int ret = 0;
12013
12014         while (offset < len) {
12015                 sectorsize = csum_root->sectorsize;
12016                 ret = read_extent_data(csum_root, buf, start + offset,
12017                                        &sectorsize, 0);
12018                 if (ret)
12019                         break;
12020                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12021                                             start + offset, buf, sectorsize);
12022                 if (ret)
12023                         break;
12024                 offset += sectorsize;
12025         }
12026         return ret;
12027 }
12028
12029 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12030                                       struct btrfs_root *csum_root,
12031                                       struct btrfs_root *cur_root)
12032 {
12033         struct btrfs_path path;
12034         struct btrfs_key key;
12035         struct extent_buffer *node;
12036         struct btrfs_file_extent_item *fi;
12037         char *buf = NULL;
12038         u64 start = 0;
12039         u64 len = 0;
12040         int slot = 0;
12041         int ret = 0;
12042
12043         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12044         if (!buf)
12045                 return -ENOMEM;
12046
12047         btrfs_init_path(&path);
12048         key.objectid = 0;
12049         key.offset = 0;
12050         key.type = 0;
12051         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12052         if (ret < 0)
12053                 goto out;
12054         /* Iterate all regular file extents and fill its csum */
12055         while (1) {
12056                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12057
12058                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12059                         goto next;
12060                 node = path.nodes[0];
12061                 slot = path.slots[0];
12062                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12063                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12064                         goto next;
12065                 start = btrfs_file_extent_disk_bytenr(node, fi);
12066                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12067
12068                 ret = populate_csum(trans, csum_root, buf, start, len);
12069                 if (ret == -EEXIST)
12070                         ret = 0;
12071                 if (ret < 0)
12072                         goto out;
12073 next:
12074                 /*
12075                  * TODO: if next leaf is corrupted, jump to nearest next valid
12076                  * leaf.
12077                  */
12078                 ret = btrfs_next_item(cur_root, &path);
12079                 if (ret < 0)
12080                         goto out;
12081                 if (ret > 0) {
12082                         ret = 0;
12083                         goto out;
12084                 }
12085         }
12086
12087 out:
12088         btrfs_release_path(&path);
12089         free(buf);
12090         return ret;
12091 }
12092
12093 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12094                                   struct btrfs_root *csum_root)
12095 {
12096         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12097         struct btrfs_path path;
12098         struct btrfs_root *tree_root = fs_info->tree_root;
12099         struct btrfs_root *cur_root;
12100         struct extent_buffer *node;
12101         struct btrfs_key key;
12102         int slot = 0;
12103         int ret = 0;
12104
12105         btrfs_init_path(&path);
12106         key.objectid = BTRFS_FS_TREE_OBJECTID;
12107         key.offset = 0;
12108         key.type = BTRFS_ROOT_ITEM_KEY;
12109         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12110         if (ret < 0)
12111                 goto out;
12112         if (ret > 0) {
12113                 ret = -ENOENT;
12114                 goto out;
12115         }
12116
12117         while (1) {
12118                 node = path.nodes[0];
12119                 slot = path.slots[0];
12120                 btrfs_item_key_to_cpu(node, &key, slot);
12121                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12122                         goto out;
12123                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12124                         goto next;
12125                 if (!is_fstree(key.objectid))
12126                         goto next;
12127                 key.offset = (u64)-1;
12128
12129                 cur_root = btrfs_read_fs_root(fs_info, &key);
12130                 if (IS_ERR(cur_root) || !cur_root) {
12131                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12132                                 key.objectid);
12133                         goto out;
12134                 }
12135                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12136                                 cur_root);
12137                 if (ret < 0)
12138                         goto out;
12139 next:
12140                 ret = btrfs_next_item(tree_root, &path);
12141                 if (ret > 0) {
12142                         ret = 0;
12143                         goto out;
12144                 }
12145                 if (ret < 0)
12146                         goto out;
12147         }
12148
12149 out:
12150         btrfs_release_path(&path);
12151         return ret;
12152 }
12153
12154 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12155                                       struct btrfs_root *csum_root)
12156 {
12157         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12158         struct btrfs_path path;
12159         struct btrfs_extent_item *ei;
12160         struct extent_buffer *leaf;
12161         char *buf;
12162         struct btrfs_key key;
12163         int ret;
12164
12165         btrfs_init_path(&path);
12166         key.objectid = 0;
12167         key.type = BTRFS_EXTENT_ITEM_KEY;
12168         key.offset = 0;
12169         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12170         if (ret < 0) {
12171                 btrfs_release_path(&path);
12172                 return ret;
12173         }
12174
12175         buf = malloc(csum_root->sectorsize);
12176         if (!buf) {
12177                 btrfs_release_path(&path);
12178                 return -ENOMEM;
12179         }
12180
12181         while (1) {
12182                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12183                         ret = btrfs_next_leaf(extent_root, &path);
12184                         if (ret < 0)
12185                                 break;
12186                         if (ret) {
12187                                 ret = 0;
12188                                 break;
12189                         }
12190                 }
12191                 leaf = path.nodes[0];
12192
12193                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12194                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12195                         path.slots[0]++;
12196                         continue;
12197                 }
12198
12199                 ei = btrfs_item_ptr(leaf, path.slots[0],
12200                                     struct btrfs_extent_item);
12201                 if (!(btrfs_extent_flags(leaf, ei) &
12202                       BTRFS_EXTENT_FLAG_DATA)) {
12203                         path.slots[0]++;
12204                         continue;
12205                 }
12206
12207                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12208                                     key.offset);
12209                 if (ret)
12210                         break;
12211                 path.slots[0]++;
12212         }
12213
12214         btrfs_release_path(&path);
12215         free(buf);
12216         return ret;
12217 }
12218
12219 /*
12220  * Recalculate the csum and put it into the csum tree.
12221  *
12222  * Extent tree init will wipe out all the extent info, so in that case, we
12223  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12224  * will use fs/subvol trees to init the csum tree.
12225  */
12226 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12227                           struct btrfs_root *csum_root,
12228                           int search_fs_tree)
12229 {
12230         if (search_fs_tree)
12231                 return fill_csum_tree_from_fs(trans, csum_root);
12232         else
12233                 return fill_csum_tree_from_extent(trans, csum_root);
12234 }
12235
12236 static void free_roots_info_cache(void)
12237 {
12238         if (!roots_info_cache)
12239                 return;
12240
12241         while (!cache_tree_empty(roots_info_cache)) {
12242                 struct cache_extent *entry;
12243                 struct root_item_info *rii;
12244
12245                 entry = first_cache_extent(roots_info_cache);
12246                 if (!entry)
12247                         break;
12248                 remove_cache_extent(roots_info_cache, entry);
12249                 rii = container_of(entry, struct root_item_info, cache_extent);
12250                 free(rii);
12251         }
12252
12253         free(roots_info_cache);
12254         roots_info_cache = NULL;
12255 }
12256
12257 static int build_roots_info_cache(struct btrfs_fs_info *info)
12258 {
12259         int ret = 0;
12260         struct btrfs_key key;
12261         struct extent_buffer *leaf;
12262         struct btrfs_path path;
12263
12264         if (!roots_info_cache) {
12265                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12266                 if (!roots_info_cache)
12267                         return -ENOMEM;
12268                 cache_tree_init(roots_info_cache);
12269         }
12270
12271         btrfs_init_path(&path);
12272         key.objectid = 0;
12273         key.type = BTRFS_EXTENT_ITEM_KEY;
12274         key.offset = 0;
12275         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12276         if (ret < 0)
12277                 goto out;
12278         leaf = path.nodes[0];
12279
12280         while (1) {
12281                 struct btrfs_key found_key;
12282                 struct btrfs_extent_item *ei;
12283                 struct btrfs_extent_inline_ref *iref;
12284                 int slot = path.slots[0];
12285                 int type;
12286                 u64 flags;
12287                 u64 root_id;
12288                 u8 level;
12289                 struct cache_extent *entry;
12290                 struct root_item_info *rii;
12291
12292                 if (slot >= btrfs_header_nritems(leaf)) {
12293                         ret = btrfs_next_leaf(info->extent_root, &path);
12294                         if (ret < 0) {
12295                                 break;
12296                         } else if (ret) {
12297                                 ret = 0;
12298                                 break;
12299                         }
12300                         leaf = path.nodes[0];
12301                         slot = path.slots[0];
12302                 }
12303
12304                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12305
12306                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12307                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12308                         goto next;
12309
12310                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12311                 flags = btrfs_extent_flags(leaf, ei);
12312
12313                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12314                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12315                         goto next;
12316
12317                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12318                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12319                         level = found_key.offset;
12320                 } else {
12321                         struct btrfs_tree_block_info *binfo;
12322
12323                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12324                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12325                         level = btrfs_tree_block_level(leaf, binfo);
12326                 }
12327
12328                 /*
12329                  * For a root extent, it must be of the following type and the
12330                  * first (and only one) iref in the item.
12331                  */
12332                 type = btrfs_extent_inline_ref_type(leaf, iref);
12333                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12334                         goto next;
12335
12336                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12337                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12338                 if (!entry) {
12339                         rii = malloc(sizeof(struct root_item_info));
12340                         if (!rii) {
12341                                 ret = -ENOMEM;
12342                                 goto out;
12343                         }
12344                         rii->cache_extent.start = root_id;
12345                         rii->cache_extent.size = 1;
12346                         rii->level = (u8)-1;
12347                         entry = &rii->cache_extent;
12348                         ret = insert_cache_extent(roots_info_cache, entry);
12349                         ASSERT(ret == 0);
12350                 } else {
12351                         rii = container_of(entry, struct root_item_info,
12352                                            cache_extent);
12353                 }
12354
12355                 ASSERT(rii->cache_extent.start == root_id);
12356                 ASSERT(rii->cache_extent.size == 1);
12357
12358                 if (level > rii->level || rii->level == (u8)-1) {
12359                         rii->level = level;
12360                         rii->bytenr = found_key.objectid;
12361                         rii->gen = btrfs_extent_generation(leaf, ei);
12362                         rii->node_count = 1;
12363                 } else if (level == rii->level) {
12364                         rii->node_count++;
12365                 }
12366 next:
12367                 path.slots[0]++;
12368         }
12369
12370 out:
12371         btrfs_release_path(&path);
12372
12373         return ret;
12374 }
12375
12376 static int maybe_repair_root_item(struct btrfs_path *path,
12377                                   const struct btrfs_key *root_key,
12378                                   const int read_only_mode)
12379 {
12380         const u64 root_id = root_key->objectid;
12381         struct cache_extent *entry;
12382         struct root_item_info *rii;
12383         struct btrfs_root_item ri;
12384         unsigned long offset;
12385
12386         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12387         if (!entry) {
12388                 fprintf(stderr,
12389                         "Error: could not find extent items for root %llu\n",
12390                         root_key->objectid);
12391                 return -ENOENT;
12392         }
12393
12394         rii = container_of(entry, struct root_item_info, cache_extent);
12395         ASSERT(rii->cache_extent.start == root_id);
12396         ASSERT(rii->cache_extent.size == 1);
12397
12398         if (rii->node_count != 1) {
12399                 fprintf(stderr,
12400                         "Error: could not find btree root extent for root %llu\n",
12401                         root_id);
12402                 return -ENOENT;
12403         }
12404
12405         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12406         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12407
12408         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12409             btrfs_root_level(&ri) != rii->level ||
12410             btrfs_root_generation(&ri) != rii->gen) {
12411
12412                 /*
12413                  * If we're in repair mode but our caller told us to not update
12414                  * the root item, i.e. just check if it needs to be updated, don't
12415                  * print this message, since the caller will call us again shortly
12416                  * for the same root item without read only mode (the caller will
12417                  * open a transaction first).
12418                  */
12419                 if (!(read_only_mode && repair))
12420                         fprintf(stderr,
12421                                 "%sroot item for root %llu,"
12422                                 " current bytenr %llu, current gen %llu, current level %u,"
12423                                 " new bytenr %llu, new gen %llu, new level %u\n",
12424                                 (read_only_mode ? "" : "fixing "),
12425                                 root_id,
12426                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12427                                 btrfs_root_level(&ri),
12428                                 rii->bytenr, rii->gen, rii->level);
12429
12430                 if (btrfs_root_generation(&ri) > rii->gen) {
12431                         fprintf(stderr,
12432                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12433                                 root_id, btrfs_root_generation(&ri), rii->gen);
12434                         return -EINVAL;
12435                 }
12436
12437                 if (!read_only_mode) {
12438                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12439                         btrfs_set_root_level(&ri, rii->level);
12440                         btrfs_set_root_generation(&ri, rii->gen);
12441                         write_extent_buffer(path->nodes[0], &ri,
12442                                             offset, sizeof(ri));
12443                 }
12444
12445                 return 1;
12446         }
12447
12448         return 0;
12449 }
12450
12451 /*
12452  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12453  * caused read-only snapshots to be corrupted if they were created at a moment
12454  * when the source subvolume/snapshot had orphan items. The issue was that the
12455  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12456  * node instead of the post orphan cleanup root node.
12457  * So this function, and its callees, just detects and fixes those cases. Even
12458  * though the regression was for read-only snapshots, this function applies to
12459  * any snapshot/subvolume root.
12460  * This must be run before any other repair code - not doing it so, makes other
12461  * repair code delete or modify backrefs in the extent tree for example, which
12462  * will result in an inconsistent fs after repairing the root items.
12463  */
12464 static int repair_root_items(struct btrfs_fs_info *info)
12465 {
12466         struct btrfs_path path;
12467         struct btrfs_key key;
12468         struct extent_buffer *leaf;
12469         struct btrfs_trans_handle *trans = NULL;
12470         int ret = 0;
12471         int bad_roots = 0;
12472         int need_trans = 0;
12473
12474         btrfs_init_path(&path);
12475
12476         ret = build_roots_info_cache(info);
12477         if (ret)
12478                 goto out;
12479
12480         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12481         key.type = BTRFS_ROOT_ITEM_KEY;
12482         key.offset = 0;
12483
12484 again:
12485         /*
12486          * Avoid opening and committing transactions if a leaf doesn't have
12487          * any root items that need to be fixed, so that we avoid rotating
12488          * backup roots unnecessarily.
12489          */
12490         if (need_trans) {
12491                 trans = btrfs_start_transaction(info->tree_root, 1);
12492                 if (IS_ERR(trans)) {
12493                         ret = PTR_ERR(trans);
12494                         goto out;
12495                 }
12496         }
12497
12498         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12499                                 0, trans ? 1 : 0);
12500         if (ret < 0)
12501                 goto out;
12502         leaf = path.nodes[0];
12503
12504         while (1) {
12505                 struct btrfs_key found_key;
12506
12507                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12508                         int no_more_keys = find_next_key(&path, &key);
12509
12510                         btrfs_release_path(&path);
12511                         if (trans) {
12512                                 ret = btrfs_commit_transaction(trans,
12513                                                                info->tree_root);
12514                                 trans = NULL;
12515                                 if (ret < 0)
12516                                         goto out;
12517                         }
12518                         need_trans = 0;
12519                         if (no_more_keys)
12520                                 break;
12521                         goto again;
12522                 }
12523
12524                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12525
12526                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12527                         goto next;
12528                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12529                         goto next;
12530
12531                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12532                 if (ret < 0)
12533                         goto out;
12534                 if (ret) {
12535                         if (!trans && repair) {
12536                                 need_trans = 1;
12537                                 key = found_key;
12538                                 btrfs_release_path(&path);
12539                                 goto again;
12540                         }
12541                         bad_roots++;
12542                 }
12543 next:
12544                 path.slots[0]++;
12545         }
12546         ret = 0;
12547 out:
12548         free_roots_info_cache();
12549         btrfs_release_path(&path);
12550         if (trans)
12551                 btrfs_commit_transaction(trans, info->tree_root);
12552         if (ret < 0)
12553                 return ret;
12554
12555         return bad_roots;
12556 }
12557
12558 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12559 {
12560         struct btrfs_trans_handle *trans;
12561         struct btrfs_block_group_cache *bg_cache;
12562         u64 current = 0;
12563         int ret = 0;
12564
12565         /* Clear all free space cache inodes and its extent data */
12566         while (1) {
12567                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12568                 if (!bg_cache)
12569                         break;
12570                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12571                 if (ret < 0)
12572                         return ret;
12573                 current = bg_cache->key.objectid + bg_cache->key.offset;
12574         }
12575
12576         /* Don't forget to set cache_generation to -1 */
12577         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12578         if (IS_ERR(trans)) {
12579                 error("failed to update super block cache generation");
12580                 return PTR_ERR(trans);
12581         }
12582         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12583         btrfs_commit_transaction(trans, fs_info->tree_root);
12584
12585         return ret;
12586 }
12587
12588 const char * const cmd_check_usage[] = {
12589         "btrfs check [options] <device>",
12590         "Check structural integrity of a filesystem (unmounted).",
12591         "Check structural integrity of an unmounted filesystem. Verify internal",
12592         "trees' consistency and item connectivity. In the repair mode try to",
12593         "fix the problems found. ",
12594         "WARNING: the repair mode is considered dangerous",
12595         "",
12596         "-s|--super <superblock>     use this superblock copy",
12597         "-b|--backup                 use the first valid backup root copy",
12598         "--repair                    try to repair the filesystem",
12599         "--readonly                  run in read-only mode (default)",
12600         "--init-csum-tree            create a new CRC tree",
12601         "--init-extent-tree          create a new extent tree",
12602         "--mode <MODE>               allows choice of memory/IO trade-offs",
12603         "                            where MODE is one of:",
12604         "                            original - read inodes and extents to memory (requires",
12605         "                                       more memory, does less IO)",
12606         "                            lowmem   - try to use less memory but read blocks again",
12607         "                                       when needed",
12608         "--check-data-csum           verify checksums of data blocks",
12609         "-Q|--qgroup-report          print a report on qgroup consistency",
12610         "-E|--subvol-extents <subvolid>",
12611         "                            print subvolume extents and sharing state",
12612         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12613         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12614         "-p|--progress               indicate progress",
12615         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12616         NULL
12617 };
12618
12619 int cmd_check(int argc, char **argv)
12620 {
12621         struct cache_tree root_cache;
12622         struct btrfs_root *root;
12623         struct btrfs_fs_info *info;
12624         u64 bytenr = 0;
12625         u64 subvolid = 0;
12626         u64 tree_root_bytenr = 0;
12627         u64 chunk_root_bytenr = 0;
12628         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12629         int ret;
12630         int err = 0;
12631         u64 num;
12632         int init_csum_tree = 0;
12633         int readonly = 0;
12634         int clear_space_cache = 0;
12635         int qgroup_report = 0;
12636         int qgroups_repaired = 0;
12637         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12638
12639         while(1) {
12640                 int c;
12641                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12642                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12643                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12644                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12645                 static const struct option long_options[] = {
12646                         { "super", required_argument, NULL, 's' },
12647                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12648                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12649                         { "init-csum-tree", no_argument, NULL,
12650                                 GETOPT_VAL_INIT_CSUM },
12651                         { "init-extent-tree", no_argument, NULL,
12652                                 GETOPT_VAL_INIT_EXTENT },
12653                         { "check-data-csum", no_argument, NULL,
12654                                 GETOPT_VAL_CHECK_CSUM },
12655                         { "backup", no_argument, NULL, 'b' },
12656                         { "subvol-extents", required_argument, NULL, 'E' },
12657                         { "qgroup-report", no_argument, NULL, 'Q' },
12658                         { "tree-root", required_argument, NULL, 'r' },
12659                         { "chunk-root", required_argument, NULL,
12660                                 GETOPT_VAL_CHUNK_TREE },
12661                         { "progress", no_argument, NULL, 'p' },
12662                         { "mode", required_argument, NULL,
12663                                 GETOPT_VAL_MODE },
12664                         { "clear-space-cache", required_argument, NULL,
12665                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12666                         { NULL, 0, NULL, 0}
12667                 };
12668
12669                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12670                 if (c < 0)
12671                         break;
12672                 switch(c) {
12673                         case 'a': /* ignored */ break;
12674                         case 'b':
12675                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12676                                 break;
12677                         case 's':
12678                                 num = arg_strtou64(optarg);
12679                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12680                                         error(
12681                                         "super mirror should be less than %d",
12682                                                 BTRFS_SUPER_MIRROR_MAX);
12683                                         exit(1);
12684                                 }
12685                                 bytenr = btrfs_sb_offset(((int)num));
12686                                 printf("using SB copy %llu, bytenr %llu\n", num,
12687                                        (unsigned long long)bytenr);
12688                                 break;
12689                         case 'Q':
12690                                 qgroup_report = 1;
12691                                 break;
12692                         case 'E':
12693                                 subvolid = arg_strtou64(optarg);
12694                                 break;
12695                         case 'r':
12696                                 tree_root_bytenr = arg_strtou64(optarg);
12697                                 break;
12698                         case GETOPT_VAL_CHUNK_TREE:
12699                                 chunk_root_bytenr = arg_strtou64(optarg);
12700                                 break;
12701                         case 'p':
12702                                 ctx.progress_enabled = true;
12703                                 break;
12704                         case '?':
12705                         case 'h':
12706                                 usage(cmd_check_usage);
12707                         case GETOPT_VAL_REPAIR:
12708                                 printf("enabling repair mode\n");
12709                                 repair = 1;
12710                                 ctree_flags |= OPEN_CTREE_WRITES;
12711                                 break;
12712                         case GETOPT_VAL_READONLY:
12713                                 readonly = 1;
12714                                 break;
12715                         case GETOPT_VAL_INIT_CSUM:
12716                                 printf("Creating a new CRC tree\n");
12717                                 init_csum_tree = 1;
12718                                 repair = 1;
12719                                 ctree_flags |= OPEN_CTREE_WRITES;
12720                                 break;
12721                         case GETOPT_VAL_INIT_EXTENT:
12722                                 init_extent_tree = 1;
12723                                 ctree_flags |= (OPEN_CTREE_WRITES |
12724                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12725                                 repair = 1;
12726                                 break;
12727                         case GETOPT_VAL_CHECK_CSUM:
12728                                 check_data_csum = 1;
12729                                 break;
12730                         case GETOPT_VAL_MODE:
12731                                 check_mode = parse_check_mode(optarg);
12732                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12733                                         error("unknown mode: %s", optarg);
12734                                         exit(1);
12735                                 }
12736                                 break;
12737                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12738                                 if (strcmp(optarg, "v1") == 0) {
12739                                         clear_space_cache = 1;
12740                                 } else if (strcmp(optarg, "v2") == 0) {
12741                                         clear_space_cache = 2;
12742                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12743                                 } else {
12744                                         error(
12745                 "invalid argument to --clear-space-cache, must be v1 or v2");
12746                                         exit(1);
12747                                 }
12748                                 ctree_flags |= OPEN_CTREE_WRITES;
12749                                 break;
12750                 }
12751         }
12752
12753         if (check_argc_exact(argc - optind, 1))
12754                 usage(cmd_check_usage);
12755
12756         if (ctx.progress_enabled) {
12757                 ctx.tp = TASK_NOTHING;
12758                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12759         }
12760
12761         /* This check is the only reason for --readonly to exist */
12762         if (readonly && repair) {
12763                 error("repair options are not compatible with --readonly");
12764                 exit(1);
12765         }
12766
12767         /*
12768          * Not supported yet
12769          */
12770         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12771                 error("low memory mode doesn't support repair yet");
12772                 exit(1);
12773         }
12774
12775         radix_tree_init();
12776         cache_tree_init(&root_cache);
12777
12778         if((ret = check_mounted(argv[optind])) < 0) {
12779                 error("could not check mount status: %s", strerror(-ret));
12780                 err |= !!ret;
12781                 goto err_out;
12782         } else if(ret) {
12783                 error("%s is currently mounted, aborting", argv[optind]);
12784                 ret = -EBUSY;
12785                 err |= !!ret;
12786                 goto err_out;
12787         }
12788
12789         /* only allow partial opening under repair mode */
12790         if (repair)
12791                 ctree_flags |= OPEN_CTREE_PARTIAL;
12792
12793         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12794                                   chunk_root_bytenr, ctree_flags);
12795         if (!info) {
12796                 error("cannot open file system");
12797                 ret = -EIO;
12798                 err |= !!ret;
12799                 goto err_out;
12800         }
12801
12802         global_info = info;
12803         root = info->fs_root;
12804         if (clear_space_cache == 1) {
12805                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12806                         error(
12807                 "free space cache v2 detected, use --clear-space-cache v2");
12808                         ret = 1;
12809                         goto close_out;
12810                 }
12811                 printf("Clearing free space cache\n");
12812                 ret = clear_free_space_cache(info);
12813                 if (ret) {
12814                         error("failed to clear free space cache");
12815                         ret = 1;
12816                 } else {
12817                         printf("Free space cache cleared\n");
12818                 }
12819                 goto close_out;
12820         } else if (clear_space_cache == 2) {
12821                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12822                         printf("no free space cache v2 to clear\n");
12823                         ret = 0;
12824                         goto close_out;
12825                 }
12826                 printf("Clear free space cache v2\n");
12827                 ret = btrfs_clear_free_space_tree(info);
12828                 if (ret) {
12829                         error("failed to clear free space cache v2: %d", ret);
12830                         ret = 1;
12831                 } else {
12832                         printf("free space cache v2 cleared\n");
12833                 }
12834                 goto close_out;
12835         }
12836
12837         /*
12838          * repair mode will force us to commit transaction which
12839          * will make us fail to load log tree when mounting.
12840          */
12841         if (repair && btrfs_super_log_root(info->super_copy)) {
12842                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12843                 if (!ret) {
12844                         ret = 1;
12845                         err |= !!ret;
12846                         goto close_out;
12847                 }
12848                 ret = zero_log_tree(root);
12849                 err |= !!ret;
12850                 if (ret) {
12851                         error("failed to zero log tree: %d", ret);
12852                         goto close_out;
12853                 }
12854         }
12855
12856         uuid_unparse(info->super_copy->fsid, uuidbuf);
12857         if (qgroup_report) {
12858                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12859                        uuidbuf);
12860                 ret = qgroup_verify_all(info);
12861                 err |= !!ret;
12862                 if (ret == 0)
12863                         report_qgroups(1);
12864                 goto close_out;
12865         }
12866         if (subvolid) {
12867                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12868                        subvolid, argv[optind], uuidbuf);
12869                 ret = print_extent_state(info, subvolid);
12870                 err |= !!ret;
12871                 goto close_out;
12872         }
12873         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12874
12875         if (!extent_buffer_uptodate(info->tree_root->node) ||
12876             !extent_buffer_uptodate(info->dev_root->node) ||
12877             !extent_buffer_uptodate(info->chunk_root->node)) {
12878                 error("critical roots corrupted, unable to check the filesystem");
12879                 err |= !!ret;
12880                 ret = -EIO;
12881                 goto close_out;
12882         }
12883
12884         if (init_extent_tree || init_csum_tree) {
12885                 struct btrfs_trans_handle *trans;
12886
12887                 trans = btrfs_start_transaction(info->extent_root, 0);
12888                 if (IS_ERR(trans)) {
12889                         error("error starting transaction");
12890                         ret = PTR_ERR(trans);
12891                         err |= !!ret;
12892                         goto close_out;
12893                 }
12894
12895                 if (init_extent_tree) {
12896                         printf("Creating a new extent tree\n");
12897                         ret = reinit_extent_tree(trans, info);
12898                         err |= !!ret;
12899                         if (ret)
12900                                 goto close_out;
12901                 }
12902
12903                 if (init_csum_tree) {
12904                         printf("Reinitialize checksum tree\n");
12905                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12906                         if (ret) {
12907                                 error("checksum tree initialization failed: %d",
12908                                                 ret);
12909                                 ret = -EIO;
12910                                 err |= !!ret;
12911                                 goto close_out;
12912                         }
12913
12914                         ret = fill_csum_tree(trans, info->csum_root,
12915                                              init_extent_tree);
12916                         err |= !!ret;
12917                         if (ret) {
12918                                 error("checksum tree refilling failed: %d", ret);
12919                                 return -EIO;
12920                         }
12921                 }
12922                 /*
12923                  * Ok now we commit and run the normal fsck, which will add
12924                  * extent entries for all of the items it finds.
12925                  */
12926                 ret = btrfs_commit_transaction(trans, info->extent_root);
12927                 err |= !!ret;
12928                 if (ret)
12929                         goto close_out;
12930         }
12931         if (!extent_buffer_uptodate(info->extent_root->node)) {
12932                 error("critical: extent_root, unable to check the filesystem");
12933                 ret = -EIO;
12934                 err |= !!ret;
12935                 goto close_out;
12936         }
12937         if (!extent_buffer_uptodate(info->csum_root->node)) {
12938                 error("critical: csum_root, unable to check the filesystem");
12939                 ret = -EIO;
12940                 err |= !!ret;
12941                 goto close_out;
12942         }
12943
12944         if (!ctx.progress_enabled)
12945                 fprintf(stderr, "checking extents\n");
12946         if (check_mode == CHECK_MODE_LOWMEM)
12947                 ret = check_chunks_and_extents_v2(root);
12948         else
12949                 ret = check_chunks_and_extents(root);
12950         err |= !!ret;
12951         if (ret)
12952                 error(
12953                 "errors found in extent allocation tree or chunk allocation");
12954
12955         ret = repair_root_items(info);
12956         err |= !!ret;
12957         if (ret < 0) {
12958                 error("failed to repair root items: %s", strerror(-ret));
12959                 goto close_out;
12960         }
12961         if (repair) {
12962                 fprintf(stderr, "Fixed %d roots.\n", ret);
12963                 ret = 0;
12964         } else if (ret > 0) {
12965                 fprintf(stderr,
12966                        "Found %d roots with an outdated root item.\n",
12967                        ret);
12968                 fprintf(stderr,
12969                         "Please run a filesystem check with the option --repair to fix them.\n");
12970                 ret = 1;
12971                 err |= !!ret;
12972                 goto close_out;
12973         }
12974
12975         if (!ctx.progress_enabled) {
12976                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12977                         fprintf(stderr, "checking free space tree\n");
12978                 else
12979                         fprintf(stderr, "checking free space cache\n");
12980         }
12981         ret = check_space_cache(root);
12982         err |= !!ret;
12983         if (ret) {
12984                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12985                         error("errors found in free space tree");
12986                 else
12987                         error("errors found in free space cache");
12988                 goto out;
12989         }
12990
12991         /*
12992          * We used to have to have these hole extents in between our real
12993          * extents so if we don't have this flag set we need to make sure there
12994          * are no gaps in the file extents for inodes, otherwise we can just
12995          * ignore it when this happens.
12996          */
12997         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12998         if (!ctx.progress_enabled)
12999                 fprintf(stderr, "checking fs roots\n");
13000         if (check_mode == CHECK_MODE_LOWMEM)
13001                 ret = check_fs_roots_v2(root->fs_info);
13002         else
13003                 ret = check_fs_roots(root, &root_cache);
13004         err |= !!ret;
13005         if (ret) {
13006                 error("errors found in fs roots");
13007                 goto out;
13008         }
13009
13010         fprintf(stderr, "checking csums\n");
13011         ret = check_csums(root);
13012         err |= !!ret;
13013         if (ret) {
13014                 error("errors found in csum tree");
13015                 goto out;
13016         }
13017
13018         fprintf(stderr, "checking root refs\n");
13019         /* For low memory mode, check_fs_roots_v2 handles root refs */
13020         if (check_mode != CHECK_MODE_LOWMEM) {
13021                 ret = check_root_refs(root, &root_cache);
13022                 err |= !!ret;
13023                 if (ret) {
13024                         error("errors found in root refs");
13025                         goto out;
13026                 }
13027         }
13028
13029         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13030                 struct extent_buffer *eb;
13031
13032                 eb = list_first_entry(&root->fs_info->recow_ebs,
13033                                       struct extent_buffer, recow);
13034                 list_del_init(&eb->recow);
13035                 ret = recow_extent_buffer(root, eb);
13036                 err |= !!ret;
13037                 if (ret) {
13038                         error("fails to fix transid errors");
13039                         break;
13040                 }
13041         }
13042
13043         while (!list_empty(&delete_items)) {
13044                 struct bad_item *bad;
13045
13046                 bad = list_first_entry(&delete_items, struct bad_item, list);
13047                 list_del_init(&bad->list);
13048                 if (repair) {
13049                         ret = delete_bad_item(root, bad);
13050                         err |= !!ret;
13051                 }
13052                 free(bad);
13053         }
13054
13055         if (info->quota_enabled) {
13056                 fprintf(stderr, "checking quota groups\n");
13057                 ret = qgroup_verify_all(info);
13058                 err |= !!ret;
13059                 if (ret) {
13060                         error("failed to check quota groups");
13061                         goto out;
13062                 }
13063                 report_qgroups(0);
13064                 ret = repair_qgroups(info, &qgroups_repaired);
13065                 err |= !!ret;
13066                 if (err) {
13067                         error("failed to repair quota groups");
13068                         goto out;
13069                 }
13070                 ret = 0;
13071         }
13072
13073         if (!list_empty(&root->fs_info->recow_ebs)) {
13074                 error("transid errors in file system");
13075                 ret = 1;
13076                 err |= !!ret;
13077         }
13078 out:
13079         if (found_old_backref) { /*
13080                  * there was a disk format change when mixed
13081                  * backref was in testing tree. The old format
13082                  * existed about one week.
13083                  */
13084                 printf("\n * Found old mixed backref format. "
13085                        "The old format is not supported! *"
13086                        "\n * Please mount the FS in readonly mode, "
13087                        "backup data and re-format the FS. *\n\n");
13088                 err |= 1;
13089         }
13090         printf("found %llu bytes used, ",
13091                (unsigned long long)bytes_used);
13092         if (err)
13093                 printf("error(s) found\n");
13094         else
13095                 printf("no error found\n");
13096         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13097         printf("total tree bytes: %llu\n",
13098                (unsigned long long)total_btree_bytes);
13099         printf("total fs tree bytes: %llu\n",
13100                (unsigned long long)total_fs_tree_bytes);
13101         printf("total extent tree bytes: %llu\n",
13102                (unsigned long long)total_extent_tree_bytes);
13103         printf("btree space waste bytes: %llu\n",
13104                (unsigned long long)btree_space_waste);
13105         printf("file data blocks allocated: %llu\n referenced %llu\n",
13106                 (unsigned long long)data_bytes_allocated,
13107                 (unsigned long long)data_bytes_referenced);
13108
13109         free_qgroup_counts();
13110         free_root_recs_tree(&root_cache);
13111 close_out:
13112         close_ctree(root);
13113 err_out:
13114         if (ctx.progress_enabled)
13115                 task_deinit(ctx.info);
13116
13117         return err;
13118 }