btrfs-progs: check: prevent attempt to insert extent record with max_size==0
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 /*
1872  * Returns >0  Found error, not fatal, should continue
1873  * Returns <0  Fatal error, must exit the whole check
1874  * Returns 0   No errors found
1875  */
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877                                struct node_refs *nrefs, int *level, int ext_ref)
1878 {
1879         struct extent_buffer *cur = path->nodes[0];
1880         struct btrfs_key key;
1881         u64 cur_bytenr;
1882         u32 nritems;
1883         u64 first_ino = 0;
1884         int root_level = btrfs_header_level(root->node);
1885         int i;
1886         int ret = 0; /* Final return value */
1887         int err = 0; /* Positive error bitmap */
1888
1889         cur_bytenr = cur->start;
1890
1891         /* skip to first inode item or the first inode number change */
1892         nritems = btrfs_header_nritems(cur);
1893         for (i = 0; i < nritems; i++) {
1894                 btrfs_item_key_to_cpu(cur, &key, i);
1895                 if (i == 0)
1896                         first_ino = key.objectid;
1897                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898                     (first_ino && first_ino != key.objectid))
1899                         break;
1900         }
1901         if (i == nritems) {
1902                 path->slots[0] = nritems;
1903                 return 0;
1904         }
1905         path->slots[0] = i;
1906
1907 again:
1908         err |= check_inode_item(root, path, ext_ref);
1909
1910         if (err & LAST_ITEM)
1911                 goto out;
1912
1913         /* still have inode items in thie leaf */
1914         if (cur->start == cur_bytenr)
1915                 goto again;
1916
1917         /*
1918          * we have switched to another leaf, above nodes may
1919          * have changed, here walk down the path, if a node
1920          * or leaf is shared, check whether we can skip this
1921          * node or leaf.
1922          */
1923         for (i = root_level; i >= 0; i--) {
1924                 if (path->nodes[i]->start == nrefs->bytenr[i])
1925                         continue;
1926
1927                 ret = update_nodes_refs(root,
1928                                 path->nodes[i]->start,
1929                                 nrefs, i);
1930                 if (ret)
1931                         goto out;
1932
1933                 if (!nrefs->need_check[i]) {
1934                         *level += 1;
1935                         break;
1936                 }
1937         }
1938
1939         for (i = 0; i < *level; i++) {
1940                 free_extent_buffer(path->nodes[i]);
1941                 path->nodes[i] = NULL;
1942         }
1943 out:
1944         err &= ~LAST_ITEM;
1945         if (err && !ret)
1946                 ret = err;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         free_extent_buffer(next);
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 /*
2217  * Returns >0  Found error, should continue
2218  * Returns <0  Fatal error, must exit the whole check
2219  * Returns 0   No errors found
2220  */
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222                              int *level, struct node_refs *nrefs, int ext_ref)
2223 {
2224         enum btrfs_tree_block_status status;
2225         u64 bytenr;
2226         u64 ptr_gen;
2227         struct extent_buffer *next;
2228         struct extent_buffer *cur;
2229         u32 blocksize;
2230         int ret;
2231
2232         WARN_ON(*level < 0);
2233         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2234
2235         ret = update_nodes_refs(root, path->nodes[*level]->start,
2236                                 nrefs, *level);
2237         if (ret < 0)
2238                 return ret;
2239
2240         while (*level >= 0) {
2241                 WARN_ON(*level < 0);
2242                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243                 cur = path->nodes[*level];
2244
2245                 if (btrfs_header_level(cur) != *level)
2246                         WARN_ON(1);
2247
2248                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249                         break;
2250                 /* Don't forgot to check leaf/node validation */
2251                 if (*level == 0) {
2252                         ret = btrfs_check_leaf(root, NULL, cur);
2253                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2254                                 ret = -EIO;
2255                                 break;
2256                         }
2257                         ret = process_one_leaf_v2(root, path, nrefs,
2258                                                   level, ext_ref);
2259                         break;
2260                 } else {
2261                         ret = btrfs_check_node(root, NULL, cur);
2262                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2263                                 ret = -EIO;
2264                                 break;
2265                         }
2266                 }
2267                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269                 blocksize = root->nodesize;
2270
2271                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2272                 if (ret)
2273                         break;
2274                 if (!nrefs->need_check[*level - 1]) {
2275                         path->slots[*level]++;
2276                         continue;
2277                 }
2278
2279                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root, bytenr, blocksize,
2284                                                ptr_gen);
2285                         if (!extent_buffer_uptodate(next)) {
2286                                 struct btrfs_key node_key;
2287
2288                                 btrfs_node_key_to_cpu(path->nodes[*level],
2289                                                       &node_key,
2290                                                       path->slots[*level]);
2291                                 btrfs_add_corrupt_extent_record(root->fs_info,
2292                                                 &node_key,
2293                                                 path->nodes[*level]->start,
2294                                                 root->nodesize, *level);
2295                                 ret = -EIO;
2296                                 break;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret < 0) 
2302                         break;
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         ret = -EIO;
2311                         break;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319         return ret;
2320 }
2321
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323                         struct walk_control *wc, int *level)
2324 {
2325         int i;
2326         struct extent_buffer *leaf;
2327
2328         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329                 leaf = path->nodes[i];
2330                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2331                         path->slots[i]++;
2332                         *level = i;
2333                         return 0;
2334                 } else {
2335                         free_extent_buffer(path->nodes[*level]);
2336                         path->nodes[*level] = NULL;
2337                         BUG_ON(*level > wc->active_node);
2338                         if (*level == wc->active_node)
2339                                 leave_shared_node(root, wc, *level);
2340                         *level = i + 1;
2341                 }
2342         }
2343         return 1;
2344 }
2345
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2347                            int *level)
2348 {
2349         int i;
2350         struct extent_buffer *leaf;
2351
2352         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353                 leaf = path->nodes[i];
2354                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355                         path->slots[i]++;
2356                         *level = i;
2357                         return 0;
2358                 } else {
2359                         free_extent_buffer(path->nodes[*level]);
2360                         path->nodes[*level] = NULL;
2361                         *level = i + 1;
2362                 }
2363         }
2364         return 1;
2365 }
2366
2367 static int check_root_dir(struct inode_record *rec)
2368 {
2369         struct inode_backref *backref;
2370         int ret = -1;
2371
2372         if (!rec->found_inode_item || rec->errors)
2373                 goto out;
2374         if (rec->nlink != 1 || rec->found_link != 0)
2375                 goto out;
2376         if (list_empty(&rec->backrefs))
2377                 goto out;
2378         backref = to_inode_backref(rec->backrefs.next);
2379         if (!backref->found_inode_ref)
2380                 goto out;
2381         if (backref->index != 0 || backref->namelen != 2 ||
2382             memcmp(backref->name, "..", 2))
2383                 goto out;
2384         if (backref->found_dir_index || backref->found_dir_item)
2385                 goto out;
2386         ret = 0;
2387 out:
2388         return ret;
2389 }
2390
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392                               struct btrfs_root *root, struct btrfs_path *path,
2393                               struct inode_record *rec)
2394 {
2395         struct btrfs_inode_item *ei;
2396         struct btrfs_key key;
2397         int ret;
2398
2399         key.objectid = rec->ino;
2400         key.type = BTRFS_INODE_ITEM_KEY;
2401         key.offset = (u64)-1;
2402
2403         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2404         if (ret < 0)
2405                 goto out;
2406         if (ret) {
2407                 if (!path->slots[0]) {
2408                         ret = -ENOENT;
2409                         goto out;
2410                 }
2411                 path->slots[0]--;
2412                 ret = 0;
2413         }
2414         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415         if (key.objectid != rec->ino) {
2416                 ret = -ENOENT;
2417                 goto out;
2418         }
2419
2420         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421                             struct btrfs_inode_item);
2422         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423         btrfs_mark_buffer_dirty(path->nodes[0]);
2424         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426                root->root_key.objectid);
2427 out:
2428         btrfs_release_path(path);
2429         return ret;
2430 }
2431
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433                                     struct btrfs_root *root,
2434                                     struct btrfs_path *path,
2435                                     struct inode_record *rec)
2436 {
2437         int ret;
2438
2439         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440         btrfs_release_path(path);
2441         if (!ret)
2442                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2443         return ret;
2444 }
2445
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447                                struct btrfs_root *root,
2448                                struct btrfs_path *path,
2449                                struct inode_record *rec)
2450 {
2451         struct btrfs_inode_item *ei;
2452         struct btrfs_key key;
2453         int ret = 0;
2454
2455         key.objectid = rec->ino;
2456         key.type = BTRFS_INODE_ITEM_KEY;
2457         key.offset = 0;
2458
2459         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2460         if (ret) {
2461                 if (ret > 0)
2462                         ret = -ENOENT;
2463                 goto out;
2464         }
2465
2466         /* Since ret == 0, no need to check anything */
2467         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468                             struct btrfs_inode_item);
2469         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470         btrfs_mark_buffer_dirty(path->nodes[0]);
2471         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472         printf("reset nbytes for ino %llu root %llu\n",
2473                rec->ino, root->root_key.objectid);
2474 out:
2475         btrfs_release_path(path);
2476         return ret;
2477 }
2478
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480                                  struct cache_tree *inode_cache,
2481                                  struct inode_record *rec,
2482                                  struct inode_backref *backref)
2483 {
2484         struct btrfs_path path;
2485         struct btrfs_trans_handle *trans;
2486         struct btrfs_dir_item *dir_item;
2487         struct extent_buffer *leaf;
2488         struct btrfs_key key;
2489         struct btrfs_disk_key disk_key;
2490         struct inode_record *dir_rec;
2491         unsigned long name_ptr;
2492         u32 data_size = sizeof(*dir_item) + backref->namelen;
2493         int ret;
2494
2495         trans = btrfs_start_transaction(root, 1);
2496         if (IS_ERR(trans))
2497                 return PTR_ERR(trans);
2498
2499         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500                 (unsigned long long)rec->ino);
2501
2502         btrfs_init_path(&path);
2503         key.objectid = backref->dir;
2504         key.type = BTRFS_DIR_INDEX_KEY;
2505         key.offset = backref->index;
2506         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2507         BUG_ON(ret);
2508
2509         leaf = path.nodes[0];
2510         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2511
2512         disk_key.objectid = cpu_to_le64(rec->ino);
2513         disk_key.type = BTRFS_INODE_ITEM_KEY;
2514         disk_key.offset = 0;
2515
2516         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518         btrfs_set_dir_data_len(leaf, dir_item, 0);
2519         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520         name_ptr = (unsigned long)(dir_item + 1);
2521         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522         btrfs_mark_buffer_dirty(leaf);
2523         btrfs_release_path(&path);
2524         btrfs_commit_transaction(trans, root);
2525
2526         backref->found_dir_index = 1;
2527         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528         BUG_ON(IS_ERR(dir_rec));
2529         if (!dir_rec)
2530                 return 0;
2531         dir_rec->found_size += backref->namelen;
2532         if (dir_rec->found_size == dir_rec->isize &&
2533             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535         if (dir_rec->found_size != dir_rec->isize)
2536                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2537
2538         return 0;
2539 }
2540
2541 static int delete_dir_index(struct btrfs_root *root,
2542                             struct inode_backref *backref)
2543 {
2544         struct btrfs_trans_handle *trans;
2545         struct btrfs_dir_item *di;
2546         struct btrfs_path path;
2547         int ret = 0;
2548
2549         trans = btrfs_start_transaction(root, 1);
2550         if (IS_ERR(trans))
2551                 return PTR_ERR(trans);
2552
2553         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554                 (unsigned long long)backref->dir,
2555                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556                 (unsigned long long)root->objectid);
2557
2558         btrfs_init_path(&path);
2559         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560                                     backref->name, backref->namelen,
2561                                     backref->index, -1);
2562         if (IS_ERR(di)) {
2563                 ret = PTR_ERR(di);
2564                 btrfs_release_path(&path);
2565                 btrfs_commit_transaction(trans, root);
2566                 if (ret == -ENOENT)
2567                         return 0;
2568                 return ret;
2569         }
2570
2571         if (!di)
2572                 ret = btrfs_del_item(trans, root, &path);
2573         else
2574                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2575         BUG_ON(ret);
2576         btrfs_release_path(&path);
2577         btrfs_commit_transaction(trans, root);
2578         return ret;
2579 }
2580
2581 static int create_inode_item(struct btrfs_root *root,
2582                              struct inode_record *rec,
2583                              int root_dir)
2584 {
2585         struct btrfs_trans_handle *trans;
2586         struct btrfs_inode_item inode_item;
2587         time_t now = time(NULL);
2588         int ret;
2589
2590         trans = btrfs_start_transaction(root, 1);
2591         if (IS_ERR(trans)) {
2592                 ret = PTR_ERR(trans);
2593                 return ret;
2594         }
2595
2596         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597                 "be incomplete, please check permissions and content after "
2598                 "the fsck completes.\n", (unsigned long long)root->objectid,
2599                 (unsigned long long)rec->ino);
2600
2601         memset(&inode_item, 0, sizeof(inode_item));
2602         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2603         if (root_dir)
2604                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2605         else
2606                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608         if (rec->found_dir_item) {
2609                 if (rec->found_file_extent)
2610                         fprintf(stderr, "root %llu inode %llu has both a dir "
2611                                 "item and extents, unsure if it is a dir or a "
2612                                 "regular file so setting it as a directory\n",
2613                                 (unsigned long long)root->objectid,
2614                                 (unsigned long long)rec->ino);
2615                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617         } else if (!rec->found_dir_item) {
2618                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2620         }
2621         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2629
2630         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2631         BUG_ON(ret);
2632         btrfs_commit_transaction(trans, root);
2633         return 0;
2634 }
2635
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637                                  struct inode_record *rec,
2638                                  struct cache_tree *inode_cache,
2639                                  int delete)
2640 {
2641         struct inode_backref *tmp, *backref;
2642         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2643         int ret = 0;
2644         int repaired = 0;
2645
2646         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647                 if (!delete && rec->ino == root_dirid) {
2648                         if (!rec->found_inode_item) {
2649                                 ret = create_inode_item(root, rec, 1);
2650                                 if (ret)
2651                                         break;
2652                                 repaired++;
2653                         }
2654                 }
2655
2656                 /* Index 0 for root dir's are special, don't mess with it */
2657                 if (rec->ino == root_dirid && backref->index == 0)
2658                         continue;
2659
2660                 if (delete &&
2661                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2662                      (backref->found_dir_index && backref->found_inode_ref &&
2663                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664                         ret = delete_dir_index(root, backref);
2665                         if (ret)
2666                                 break;
2667                         repaired++;
2668                         list_del(&backref->list);
2669                         free(backref);
2670                 }
2671
2672                 if (!delete && !backref->found_dir_index &&
2673                     backref->found_dir_item && backref->found_inode_ref) {
2674                         ret = add_missing_dir_index(root, inode_cache, rec,
2675                                                     backref);
2676                         if (ret)
2677                                 break;
2678                         repaired++;
2679                         if (backref->found_dir_item &&
2680                             backref->found_dir_index &&
2681                             backref->found_dir_index) {
2682                                 if (!backref->errors &&
2683                                     backref->found_inode_ref) {
2684                                         list_del(&backref->list);
2685                                         free(backref);
2686                                 }
2687                         }
2688                 }
2689
2690                 if (!delete && (!backref->found_dir_index &&
2691                                 !backref->found_dir_item &&
2692                                 backref->found_inode_ref)) {
2693                         struct btrfs_trans_handle *trans;
2694                         struct btrfs_key location;
2695
2696                         ret = check_dir_conflict(root, backref->name,
2697                                                  backref->namelen,
2698                                                  backref->dir,
2699                                                  backref->index);
2700                         if (ret) {
2701                                 /*
2702                                  * let nlink fixing routine to handle it,
2703                                  * which can do it better.
2704                                  */
2705                                 ret = 0;
2706                                 break;
2707                         }
2708                         location.objectid = rec->ino;
2709                         location.type = BTRFS_INODE_ITEM_KEY;
2710                         location.offset = 0;
2711
2712                         trans = btrfs_start_transaction(root, 1);
2713                         if (IS_ERR(trans)) {
2714                                 ret = PTR_ERR(trans);
2715                                 break;
2716                         }
2717                         fprintf(stderr, "adding missing dir index/item pair "
2718                                 "for inode %llu\n",
2719                                 (unsigned long long)rec->ino);
2720                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2721                                                     backref->namelen,
2722                                                     backref->dir, &location,
2723                                                     imode_to_type(rec->imode),
2724                                                     backref->index);
2725                         BUG_ON(ret);
2726                         btrfs_commit_transaction(trans, root);
2727                         repaired++;
2728                 }
2729
2730                 if (!delete && (backref->found_inode_ref &&
2731                                 backref->found_dir_index &&
2732                                 backref->found_dir_item &&
2733                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2734                                 !rec->found_inode_item)) {
2735                         ret = create_inode_item(root, rec, 0);
2736                         if (ret)
2737                                 break;
2738                         repaired++;
2739                 }
2740
2741         }
2742         return ret ? ret : repaired;
2743 }
2744
2745 /*
2746  * To determine the file type for nlink/inode_item repair
2747  *
2748  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2749  * Return -ENOENT if file type is not found.
2750  */
2751 static int find_file_type(struct inode_record *rec, u8 *type)
2752 {
2753         struct inode_backref *backref;
2754
2755         /* For inode item recovered case */
2756         if (rec->found_inode_item) {
2757                 *type = imode_to_type(rec->imode);
2758                 return 0;
2759         }
2760
2761         list_for_each_entry(backref, &rec->backrefs, list) {
2762                 if (backref->found_dir_index || backref->found_dir_item) {
2763                         *type = backref->filetype;
2764                         return 0;
2765                 }
2766         }
2767         return -ENOENT;
2768 }
2769
2770 /*
2771  * To determine the file name for nlink repair
2772  *
2773  * Return 0 if file name is found, set name and namelen.
2774  * Return -ENOENT if file name is not found.
2775  */
2776 static int find_file_name(struct inode_record *rec,
2777                           char *name, int *namelen)
2778 {
2779         struct inode_backref *backref;
2780
2781         list_for_each_entry(backref, &rec->backrefs, list) {
2782                 if (backref->found_dir_index || backref->found_dir_item ||
2783                     backref->found_inode_ref) {
2784                         memcpy(name, backref->name, backref->namelen);
2785                         *namelen = backref->namelen;
2786                         return 0;
2787                 }
2788         }
2789         return -ENOENT;
2790 }
2791
2792 /* Reset the nlink of the inode to the correct one */
2793 static int reset_nlink(struct btrfs_trans_handle *trans,
2794                        struct btrfs_root *root,
2795                        struct btrfs_path *path,
2796                        struct inode_record *rec)
2797 {
2798         struct inode_backref *backref;
2799         struct inode_backref *tmp;
2800         struct btrfs_key key;
2801         struct btrfs_inode_item *inode_item;
2802         int ret = 0;
2803
2804         /* We don't believe this either, reset it and iterate backref */
2805         rec->found_link = 0;
2806
2807         /* Remove all backref including the valid ones */
2808         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2809                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2810                                    backref->index, backref->name,
2811                                    backref->namelen, 0);
2812                 if (ret < 0)
2813                         goto out;
2814
2815                 /* remove invalid backref, so it won't be added back */
2816                 if (!(backref->found_dir_index &&
2817                       backref->found_dir_item &&
2818                       backref->found_inode_ref)) {
2819                         list_del(&backref->list);
2820                         free(backref);
2821                 } else {
2822                         rec->found_link++;
2823                 }
2824         }
2825
2826         /* Set nlink to 0 */
2827         key.objectid = rec->ino;
2828         key.type = BTRFS_INODE_ITEM_KEY;
2829         key.offset = 0;
2830         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2831         if (ret < 0)
2832                 goto out;
2833         if (ret > 0) {
2834                 ret = -ENOENT;
2835                 goto out;
2836         }
2837         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2838                                     struct btrfs_inode_item);
2839         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2840         btrfs_mark_buffer_dirty(path->nodes[0]);
2841         btrfs_release_path(path);
2842
2843         /*
2844          * Add back valid inode_ref/dir_item/dir_index,
2845          * add_link() will handle the nlink inc, so new nlink must be correct
2846          */
2847         list_for_each_entry(backref, &rec->backrefs, list) {
2848                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2849                                      backref->name, backref->namelen,
2850                                      backref->filetype, &backref->index, 1);
2851                 if (ret < 0)
2852                         goto out;
2853         }
2854 out:
2855         btrfs_release_path(path);
2856         return ret;
2857 }
2858
2859 static int get_highest_inode(struct btrfs_trans_handle *trans,
2860                                 struct btrfs_root *root,
2861                                 struct btrfs_path *path,
2862                                 u64 *highest_ino)
2863 {
2864         struct btrfs_key key, found_key;
2865         int ret;
2866
2867         btrfs_init_path(path);
2868         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2869         key.offset = -1;
2870         key.type = BTRFS_INODE_ITEM_KEY;
2871         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2872         if (ret == 1) {
2873                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2874                                 path->slots[0] - 1);
2875                 *highest_ino = found_key.objectid;
2876                 ret = 0;
2877         }
2878         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2879                 ret = -EOVERFLOW;
2880         btrfs_release_path(path);
2881         return ret;
2882 }
2883
2884 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2885                                struct btrfs_root *root,
2886                                struct btrfs_path *path,
2887                                struct inode_record *rec)
2888 {
2889         char *dir_name = "lost+found";
2890         char namebuf[BTRFS_NAME_LEN] = {0};
2891         u64 lost_found_ino;
2892         u32 mode = 0700;
2893         u8 type = 0;
2894         int namelen = 0;
2895         int name_recovered = 0;
2896         int type_recovered = 0;
2897         int ret = 0;
2898
2899         /*
2900          * Get file name and type first before these invalid inode ref
2901          * are deleted by remove_all_invalid_backref()
2902          */
2903         name_recovered = !find_file_name(rec, namebuf, &namelen);
2904         type_recovered = !find_file_type(rec, &type);
2905
2906         if (!name_recovered) {
2907                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2908                        rec->ino, rec->ino);
2909                 namelen = count_digits(rec->ino);
2910                 sprintf(namebuf, "%llu", rec->ino);
2911                 name_recovered = 1;
2912         }
2913         if (!type_recovered) {
2914                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2915                        rec->ino);
2916                 type = BTRFS_FT_REG_FILE;
2917                 type_recovered = 1;
2918         }
2919
2920         ret = reset_nlink(trans, root, path, rec);
2921         if (ret < 0) {
2922                 fprintf(stderr,
2923                         "Failed to reset nlink for inode %llu: %s\n",
2924                         rec->ino, strerror(-ret));
2925                 goto out;
2926         }
2927
2928         if (rec->found_link == 0) {
2929                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2930                 if (ret < 0)
2931                         goto out;
2932                 lost_found_ino++;
2933                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2934                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2935                                   mode);
2936                 if (ret < 0) {
2937                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2938                                 dir_name, strerror(-ret));
2939                         goto out;
2940                 }
2941                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2942                                      namebuf, namelen, type, NULL, 1);
2943                 /*
2944                  * Add ".INO" suffix several times to handle case where
2945                  * "FILENAME.INO" is already taken by another file.
2946                  */
2947                 while (ret == -EEXIST) {
2948                         /*
2949                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2950                          */
2951                         if (namelen + count_digits(rec->ino) + 1 >
2952                             BTRFS_NAME_LEN) {
2953                                 ret = -EFBIG;
2954                                 goto out;
2955                         }
2956                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2957                                  ".%llu", rec->ino);
2958                         namelen += count_digits(rec->ino) + 1;
2959                         ret = btrfs_add_link(trans, root, rec->ino,
2960                                              lost_found_ino, namebuf,
2961                                              namelen, type, NULL, 1);
2962                 }
2963                 if (ret < 0) {
2964                         fprintf(stderr,
2965                                 "Failed to link the inode %llu to %s dir: %s\n",
2966                                 rec->ino, dir_name, strerror(-ret));
2967                         goto out;
2968                 }
2969                 /*
2970                  * Just increase the found_link, don't actually add the
2971                  * backref. This will make things easier and this inode
2972                  * record will be freed after the repair is done.
2973                  * So fsck will not report problem about this inode.
2974                  */
2975                 rec->found_link++;
2976                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2977                        namelen, namebuf, dir_name);
2978         }
2979         printf("Fixed the nlink of inode %llu\n", rec->ino);
2980 out:
2981         /*
2982          * Clear the flag anyway, or we will loop forever for the same inode
2983          * as it will not be removed from the bad inode list and the dead loop
2984          * happens.
2985          */
2986         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2987         btrfs_release_path(path);
2988         return ret;
2989 }
2990
2991 /*
2992  * Check if there is any normal(reg or prealloc) file extent for given
2993  * ino.
2994  * This is used to determine the file type when neither its dir_index/item or
2995  * inode_item exists.
2996  *
2997  * This will *NOT* report error, if any error happens, just consider it does
2998  * not have any normal file extent.
2999  */
3000 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3001 {
3002         struct btrfs_path path;
3003         struct btrfs_key key;
3004         struct btrfs_key found_key;
3005         struct btrfs_file_extent_item *fi;
3006         u8 type;
3007         int ret = 0;
3008
3009         btrfs_init_path(&path);
3010         key.objectid = ino;
3011         key.type = BTRFS_EXTENT_DATA_KEY;
3012         key.offset = 0;
3013
3014         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3015         if (ret < 0) {
3016                 ret = 0;
3017                 goto out;
3018         }
3019         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3020                 ret = btrfs_next_leaf(root, &path);
3021                 if (ret) {
3022                         ret = 0;
3023                         goto out;
3024                 }
3025         }
3026         while (1) {
3027                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3028                                       path.slots[0]);
3029                 if (found_key.objectid != ino ||
3030                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3031                         break;
3032                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3033                                     struct btrfs_file_extent_item);
3034                 type = btrfs_file_extent_type(path.nodes[0], fi);
3035                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3036                         ret = 1;
3037                         goto out;
3038                 }
3039         }
3040 out:
3041         btrfs_release_path(&path);
3042         return ret;
3043 }
3044
3045 static u32 btrfs_type_to_imode(u8 type)
3046 {
3047         static u32 imode_by_btrfs_type[] = {
3048                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3049                 [BTRFS_FT_DIR]          = S_IFDIR,
3050                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3051                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3052                 [BTRFS_FT_FIFO]         = S_IFIFO,
3053                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3054                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3055         };
3056
3057         return imode_by_btrfs_type[(type)];
3058 }
3059
3060 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3061                                 struct btrfs_root *root,
3062                                 struct btrfs_path *path,
3063                                 struct inode_record *rec)
3064 {
3065         u8 filetype;
3066         u32 mode = 0700;
3067         int type_recovered = 0;
3068         int ret = 0;
3069
3070         printf("Trying to rebuild inode:%llu\n", rec->ino);
3071
3072         type_recovered = !find_file_type(rec, &filetype);
3073
3074         /*
3075          * Try to determine inode type if type not found.
3076          *
3077          * For found regular file extent, it must be FILE.
3078          * For found dir_item/index, it must be DIR.
3079          *
3080          * For undetermined one, use FILE as fallback.
3081          *
3082          * TODO:
3083          * 1. If found backref(inode_index/item is already handled) to it,
3084          *    it must be DIR.
3085          *    Need new inode-inode ref structure to allow search for that.
3086          */
3087         if (!type_recovered) {
3088                 if (rec->found_file_extent &&
3089                     find_normal_file_extent(root, rec->ino)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else if (rec->found_dir_item) {
3093                         type_recovered = 1;
3094                         filetype = BTRFS_FT_DIR;
3095                 } else if (!list_empty(&rec->orphan_extents)) {
3096                         type_recovered = 1;
3097                         filetype = BTRFS_FT_REG_FILE;
3098                 } else{
3099                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3100                                rec->ino);
3101                         type_recovered = 1;
3102                         filetype = BTRFS_FT_REG_FILE;
3103                 }
3104         }
3105
3106         ret = btrfs_new_inode(trans, root, rec->ino,
3107                               mode | btrfs_type_to_imode(filetype));
3108         if (ret < 0)
3109                 goto out;
3110
3111         /*
3112          * Here inode rebuild is done, we only rebuild the inode item,
3113          * don't repair the nlink(like move to lost+found).
3114          * That is the job of nlink repair.
3115          *
3116          * We just fill the record and return
3117          */
3118         rec->found_dir_item = 1;
3119         rec->imode = mode | btrfs_type_to_imode(filetype);
3120         rec->nlink = 0;
3121         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3122         /* Ensure the inode_nlinks repair function will be called */
3123         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3124 out:
3125         return ret;
3126 }
3127
3128 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3129                                       struct btrfs_root *root,
3130                                       struct btrfs_path *path,
3131                                       struct inode_record *rec)
3132 {
3133         struct orphan_data_extent *orphan;
3134         struct orphan_data_extent *tmp;
3135         int ret = 0;
3136
3137         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3138                 /*
3139                  * Check for conflicting file extents
3140                  *
3141                  * Here we don't know whether the extents is compressed or not,
3142                  * so we can only assume it not compressed nor data offset,
3143                  * and use its disk_len as extent length.
3144                  */
3145                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3146                                        orphan->offset, orphan->disk_len, 0);
3147                 btrfs_release_path(path);
3148                 if (ret < 0)
3149                         goto out;
3150                 if (!ret) {
3151                         fprintf(stderr,
3152                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3153                                 orphan->disk_bytenr, orphan->disk_len);
3154                         ret = btrfs_free_extent(trans,
3155                                         root->fs_info->extent_root,
3156                                         orphan->disk_bytenr, orphan->disk_len,
3157                                         0, root->objectid, orphan->objectid,
3158                                         orphan->offset);
3159                         if (ret < 0)
3160                                 goto out;
3161                 }
3162                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3163                                 orphan->offset, orphan->disk_bytenr,
3164                                 orphan->disk_len, orphan->disk_len);
3165                 if (ret < 0)
3166                         goto out;
3167
3168                 /* Update file size info */
3169                 rec->found_size += orphan->disk_len;
3170                 if (rec->found_size == rec->nbytes)
3171                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3172
3173                 /* Update the file extent hole info too */
3174                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3175                                            orphan->disk_len);
3176                 if (ret < 0)
3177                         goto out;
3178                 if (RB_EMPTY_ROOT(&rec->holes))
3179                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3180
3181                 list_del(&orphan->list);
3182                 free(orphan);
3183         }
3184         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3185 out:
3186         return ret;
3187 }
3188
3189 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3190                                         struct btrfs_root *root,
3191                                         struct btrfs_path *path,
3192                                         struct inode_record *rec)
3193 {
3194         struct rb_node *node;
3195         struct file_extent_hole *hole;
3196         int found = 0;
3197         int ret = 0;
3198
3199         node = rb_first(&rec->holes);
3200
3201         while (node) {
3202                 found = 1;
3203                 hole = rb_entry(node, struct file_extent_hole, node);
3204                 ret = btrfs_punch_hole(trans, root, rec->ino,
3205                                        hole->start, hole->len);
3206                 if (ret < 0)
3207                         goto out;
3208                 ret = del_file_extent_hole(&rec->holes, hole->start,
3209                                            hole->len);
3210                 if (ret < 0)
3211                         goto out;
3212                 if (RB_EMPTY_ROOT(&rec->holes))
3213                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3214                 node = rb_first(&rec->holes);
3215         }
3216         /* special case for a file losing all its file extent */
3217         if (!found) {
3218                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3219                                        round_up(rec->isize, root->sectorsize));
3220                 if (ret < 0)
3221                         goto out;
3222         }
3223         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3224                rec->ino, root->objectid);
3225 out:
3226         return ret;
3227 }
3228
3229 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3230 {
3231         struct btrfs_trans_handle *trans;
3232         struct btrfs_path path;
3233         int ret = 0;
3234
3235         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3236                              I_ERR_NO_ORPHAN_ITEM |
3237                              I_ERR_LINK_COUNT_WRONG |
3238                              I_ERR_NO_INODE_ITEM |
3239                              I_ERR_FILE_EXTENT_ORPHAN |
3240                              I_ERR_FILE_EXTENT_DISCOUNT|
3241                              I_ERR_FILE_NBYTES_WRONG)))
3242                 return rec->errors;
3243
3244         /*
3245          * For nlink repair, it may create a dir and add link, so
3246          * 2 for parent(256)'s dir_index and dir_item
3247          * 2 for lost+found dir's inode_item and inode_ref
3248          * 1 for the new inode_ref of the file
3249          * 2 for lost+found dir's dir_index and dir_item for the file
3250          */
3251         trans = btrfs_start_transaction(root, 7);
3252         if (IS_ERR(trans))
3253                 return PTR_ERR(trans);
3254
3255         btrfs_init_path(&path);
3256         if (rec->errors & I_ERR_NO_INODE_ITEM)
3257                 ret = repair_inode_no_item(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3259                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3261                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3263                 ret = repair_inode_isize(trans, root, &path, rec);
3264         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3265                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3266         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3267                 ret = repair_inode_nlinks(trans, root, &path, rec);
3268         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3269                 ret = repair_inode_nbytes(trans, root, &path, rec);
3270         btrfs_commit_transaction(trans, root);
3271         btrfs_release_path(&path);
3272         return ret;
3273 }
3274
3275 static int check_inode_recs(struct btrfs_root *root,
3276                             struct cache_tree *inode_cache)
3277 {
3278         struct cache_extent *cache;
3279         struct ptr_node *node;
3280         struct inode_record *rec;
3281         struct inode_backref *backref;
3282         int stage = 0;
3283         int ret = 0;
3284         int err = 0;
3285         u64 error = 0;
3286         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3287
3288         if (btrfs_root_refs(&root->root_item) == 0) {
3289                 if (!cache_tree_empty(inode_cache))
3290                         fprintf(stderr, "warning line %d\n", __LINE__);
3291                 return 0;
3292         }
3293
3294         /*
3295          * We need to repair backrefs first because we could change some of the
3296          * errors in the inode recs.
3297          *
3298          * We also need to go through and delete invalid backrefs first and then
3299          * add the correct ones second.  We do this because we may get EEXIST
3300          * when adding back the correct index because we hadn't yet deleted the
3301          * invalid index.
3302          *
3303          * For example, if we were missing a dir index then the directories
3304          * isize would be wrong, so if we fixed the isize to what we thought it
3305          * would be and then fixed the backref we'd still have a invalid fs, so
3306          * we need to add back the dir index and then check to see if the isize
3307          * is still wrong.
3308          */
3309         while (stage < 3) {
3310                 stage++;
3311                 if (stage == 3 && !err)
3312                         break;
3313
3314                 cache = search_cache_extent(inode_cache, 0);
3315                 while (repair && cache) {
3316                         node = container_of(cache, struct ptr_node, cache);
3317                         rec = node->data;
3318                         cache = next_cache_extent(cache);
3319
3320                         /* Need to free everything up and rescan */
3321                         if (stage == 3) {
3322                                 remove_cache_extent(inode_cache, &node->cache);
3323                                 free(node);
3324                                 free_inode_rec(rec);
3325                                 continue;
3326                         }
3327
3328                         if (list_empty(&rec->backrefs))
3329                                 continue;
3330
3331                         ret = repair_inode_backrefs(root, rec, inode_cache,
3332                                                     stage == 1);
3333                         if (ret < 0) {
3334                                 err = ret;
3335                                 stage = 2;
3336                                 break;
3337                         } if (ret > 0) {
3338                                 err = -EAGAIN;
3339                         }
3340                 }
3341         }
3342         if (err)
3343                 return err;
3344
3345         rec = get_inode_rec(inode_cache, root_dirid, 0);
3346         BUG_ON(IS_ERR(rec));
3347         if (rec) {
3348                 ret = check_root_dir(rec);
3349                 if (ret) {
3350                         fprintf(stderr, "root %llu root dir %llu error\n",
3351                                 (unsigned long long)root->root_key.objectid,
3352                                 (unsigned long long)root_dirid);
3353                         print_inode_error(root, rec);
3354                         error++;
3355                 }
3356         } else {
3357                 if (repair) {
3358                         struct btrfs_trans_handle *trans;
3359
3360                         trans = btrfs_start_transaction(root, 1);
3361                         if (IS_ERR(trans)) {
3362                                 err = PTR_ERR(trans);
3363                                 return err;
3364                         }
3365
3366                         fprintf(stderr,
3367                                 "root %llu missing its root dir, recreating\n",
3368                                 (unsigned long long)root->objectid);
3369
3370                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3371                         BUG_ON(ret);
3372
3373                         btrfs_commit_transaction(trans, root);
3374                         return -EAGAIN;
3375                 }
3376
3377                 fprintf(stderr, "root %llu root dir %llu not found\n",
3378                         (unsigned long long)root->root_key.objectid,
3379                         (unsigned long long)root_dirid);
3380         }
3381
3382         while (1) {
3383                 cache = search_cache_extent(inode_cache, 0);
3384                 if (!cache)
3385                         break;
3386                 node = container_of(cache, struct ptr_node, cache);
3387                 rec = node->data;
3388                 remove_cache_extent(inode_cache, &node->cache);
3389                 free(node);
3390                 if (rec->ino == root_dirid ||
3391                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3392                         free_inode_rec(rec);
3393                         continue;
3394                 }
3395
3396                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3397                         ret = check_orphan_item(root, rec->ino);
3398                         if (ret == 0)
3399                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3400                         if (can_free_inode_rec(rec)) {
3401                                 free_inode_rec(rec);
3402                                 continue;
3403                         }
3404                 }
3405
3406                 if (!rec->found_inode_item)
3407                         rec->errors |= I_ERR_NO_INODE_ITEM;
3408                 if (rec->found_link != rec->nlink)
3409                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3410                 if (repair) {
3411                         ret = try_repair_inode(root, rec);
3412                         if (ret == 0 && can_free_inode_rec(rec)) {
3413                                 free_inode_rec(rec);
3414                                 continue;
3415                         }
3416                         ret = 0;
3417                 }
3418
3419                 if (!(repair && ret == 0))
3420                         error++;
3421                 print_inode_error(root, rec);
3422                 list_for_each_entry(backref, &rec->backrefs, list) {
3423                         if (!backref->found_dir_item)
3424                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3425                         if (!backref->found_dir_index)
3426                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3427                         if (!backref->found_inode_ref)
3428                                 backref->errors |= REF_ERR_NO_INODE_REF;
3429                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3430                                 " namelen %u name %s filetype %d errors %x",
3431                                 (unsigned long long)backref->dir,
3432                                 (unsigned long long)backref->index,
3433                                 backref->namelen, backref->name,
3434                                 backref->filetype, backref->errors);
3435                         print_ref_error(backref->errors);
3436                 }
3437                 free_inode_rec(rec);
3438         }
3439         return (error > 0) ? -1 : 0;
3440 }
3441
3442 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3443                                         u64 objectid)
3444 {
3445         struct cache_extent *cache;
3446         struct root_record *rec = NULL;
3447         int ret;
3448
3449         cache = lookup_cache_extent(root_cache, objectid, 1);
3450         if (cache) {
3451                 rec = container_of(cache, struct root_record, cache);
3452         } else {
3453                 rec = calloc(1, sizeof(*rec));
3454                 if (!rec)
3455                         return ERR_PTR(-ENOMEM);
3456                 rec->objectid = objectid;
3457                 INIT_LIST_HEAD(&rec->backrefs);
3458                 rec->cache.start = objectid;
3459                 rec->cache.size = 1;
3460
3461                 ret = insert_cache_extent(root_cache, &rec->cache);
3462                 if (ret)
3463                         return ERR_PTR(-EEXIST);
3464         }
3465         return rec;
3466 }
3467
3468 static struct root_backref *get_root_backref(struct root_record *rec,
3469                                              u64 ref_root, u64 dir, u64 index,
3470                                              const char *name, int namelen)
3471 {
3472         struct root_backref *backref;
3473
3474         list_for_each_entry(backref, &rec->backrefs, list) {
3475                 if (backref->ref_root != ref_root || backref->dir != dir ||
3476                     backref->namelen != namelen)
3477                         continue;
3478                 if (memcmp(name, backref->name, namelen))
3479                         continue;
3480                 return backref;
3481         }
3482
3483         backref = calloc(1, sizeof(*backref) + namelen + 1);
3484         if (!backref)
3485                 return NULL;
3486         backref->ref_root = ref_root;
3487         backref->dir = dir;
3488         backref->index = index;
3489         backref->namelen = namelen;
3490         memcpy(backref->name, name, namelen);
3491         backref->name[namelen] = '\0';
3492         list_add_tail(&backref->list, &rec->backrefs);
3493         return backref;
3494 }
3495
3496 static void free_root_record(struct cache_extent *cache)
3497 {
3498         struct root_record *rec;
3499         struct root_backref *backref;
3500
3501         rec = container_of(cache, struct root_record, cache);
3502         while (!list_empty(&rec->backrefs)) {
3503                 backref = to_root_backref(rec->backrefs.next);
3504                 list_del(&backref->list);
3505                 free(backref);
3506         }
3507
3508         free(rec);
3509 }
3510
3511 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3512
3513 static int add_root_backref(struct cache_tree *root_cache,
3514                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3515                             const char *name, int namelen,
3516                             int item_type, int errors)
3517 {
3518         struct root_record *rec;
3519         struct root_backref *backref;
3520
3521         rec = get_root_rec(root_cache, root_id);
3522         BUG_ON(IS_ERR(rec));
3523         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3524         BUG_ON(!backref);
3525
3526         backref->errors |= errors;
3527
3528         if (item_type != BTRFS_DIR_ITEM_KEY) {
3529                 if (backref->found_dir_index || backref->found_back_ref ||
3530                     backref->found_forward_ref) {
3531                         if (backref->index != index)
3532                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3533                 } else {
3534                         backref->index = index;
3535                 }
3536         }
3537
3538         if (item_type == BTRFS_DIR_ITEM_KEY) {
3539                 if (backref->found_forward_ref)
3540                         rec->found_ref++;
3541                 backref->found_dir_item = 1;
3542         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3543                 backref->found_dir_index = 1;
3544         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3545                 if (backref->found_forward_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3547                 else if (backref->found_dir_item)
3548                         rec->found_ref++;
3549                 backref->found_forward_ref = 1;
3550         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3551                 if (backref->found_back_ref)
3552                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3553                 backref->found_back_ref = 1;
3554         } else {
3555                 BUG_ON(1);
3556         }
3557
3558         if (backref->found_forward_ref && backref->found_dir_item)
3559                 backref->reachable = 1;
3560         return 0;
3561 }
3562
3563 static int merge_root_recs(struct btrfs_root *root,
3564                            struct cache_tree *src_cache,
3565                            struct cache_tree *dst_cache)
3566 {
3567         struct cache_extent *cache;
3568         struct ptr_node *node;
3569         struct inode_record *rec;
3570         struct inode_backref *backref;
3571         int ret = 0;
3572
3573         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3574                 free_inode_recs_tree(src_cache);
3575                 return 0;
3576         }
3577
3578         while (1) {
3579                 cache = search_cache_extent(src_cache, 0);
3580                 if (!cache)
3581                         break;
3582                 node = container_of(cache, struct ptr_node, cache);
3583                 rec = node->data;
3584                 remove_cache_extent(src_cache, &node->cache);
3585                 free(node);
3586
3587                 ret = is_child_root(root, root->objectid, rec->ino);
3588                 if (ret < 0)
3589                         break;
3590                 else if (ret == 0)
3591                         goto skip;
3592
3593                 list_for_each_entry(backref, &rec->backrefs, list) {
3594                         BUG_ON(backref->found_inode_ref);
3595                         if (backref->found_dir_item)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3600                                         backref->errors);
3601                         if (backref->found_dir_index)
3602                                 add_root_backref(dst_cache, rec->ino,
3603                                         root->root_key.objectid, backref->dir,
3604                                         backref->index, backref->name,
3605                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3606                                         backref->errors);
3607                 }
3608 skip:
3609                 free_inode_rec(rec);
3610         }
3611         if (ret < 0)
3612                 return ret;
3613         return 0;
3614 }
3615
3616 static int check_root_refs(struct btrfs_root *root,
3617                            struct cache_tree *root_cache)
3618 {
3619         struct root_record *rec;
3620         struct root_record *ref_root;
3621         struct root_backref *backref;
3622         struct cache_extent *cache;
3623         int loop = 1;
3624         int ret;
3625         int error;
3626         int errors = 0;
3627
3628         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3629         BUG_ON(IS_ERR(rec));
3630         rec->found_ref = 1;
3631
3632         /* fixme: this can not detect circular references */
3633         while (loop) {
3634                 loop = 0;
3635                 cache = search_cache_extent(root_cache, 0);
3636                 while (1) {
3637                         if (!cache)
3638                                 break;
3639                         rec = container_of(cache, struct root_record, cache);
3640                         cache = next_cache_extent(cache);
3641
3642                         if (rec->found_ref == 0)
3643                                 continue;
3644
3645                         list_for_each_entry(backref, &rec->backrefs, list) {
3646                                 if (!backref->reachable)
3647                                         continue;
3648
3649                                 ref_root = get_root_rec(root_cache,
3650                                                         backref->ref_root);
3651                                 BUG_ON(IS_ERR(ref_root));
3652                                 if (ref_root->found_ref > 0)
3653                                         continue;
3654
3655                                 backref->reachable = 0;
3656                                 rec->found_ref--;
3657                                 if (rec->found_ref == 0)
3658                                         loop = 1;
3659                         }
3660                 }
3661         }
3662
3663         cache = search_cache_extent(root_cache, 0);
3664         while (1) {
3665                 if (!cache)
3666                         break;
3667                 rec = container_of(cache, struct root_record, cache);
3668                 cache = next_cache_extent(cache);
3669
3670                 if (rec->found_ref == 0 &&
3671                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3672                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3673                         ret = check_orphan_item(root->fs_info->tree_root,
3674                                                 rec->objectid);
3675                         if (ret == 0)
3676                                 continue;
3677
3678                         /*
3679                          * If we don't have a root item then we likely just have
3680                          * a dir item in a snapshot for this root but no actual
3681                          * ref key or anything so it's meaningless.
3682                          */
3683                         if (!rec->found_root_item)
3684                                 continue;
3685                         errors++;
3686                         fprintf(stderr, "fs tree %llu not referenced\n",
3687                                 (unsigned long long)rec->objectid);
3688                 }
3689
3690                 error = 0;
3691                 if (rec->found_ref > 0 && !rec->found_root_item)
3692                         error = 1;
3693                 list_for_each_entry(backref, &rec->backrefs, list) {
3694                         if (!backref->found_dir_item)
3695                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3696                         if (!backref->found_dir_index)
3697                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3698                         if (!backref->found_back_ref)
3699                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3700                         if (!backref->found_forward_ref)
3701                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3702                         if (backref->reachable && backref->errors)
3703                                 error = 1;
3704                 }
3705                 if (!error)
3706                         continue;
3707
3708                 errors++;
3709                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3710                         (unsigned long long)rec->objectid, rec->found_ref,
3711                          rec->found_root_item ? "" : "not found");
3712
3713                 list_for_each_entry(backref, &rec->backrefs, list) {
3714                         if (!backref->reachable)
3715                                 continue;
3716                         if (!backref->errors && rec->found_root_item)
3717                                 continue;
3718                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3719                                 " index %llu namelen %u name %s errors %x\n",
3720                                 (unsigned long long)backref->ref_root,
3721                                 (unsigned long long)backref->dir,
3722                                 (unsigned long long)backref->index,
3723                                 backref->namelen, backref->name,
3724                                 backref->errors);
3725                         print_ref_error(backref->errors);
3726                 }
3727         }
3728         return errors > 0 ? 1 : 0;
3729 }
3730
3731 static int process_root_ref(struct extent_buffer *eb, int slot,
3732                             struct btrfs_key *key,
3733                             struct cache_tree *root_cache)
3734 {
3735         u64 dirid;
3736         u64 index;
3737         u32 len;
3738         u32 name_len;
3739         struct btrfs_root_ref *ref;
3740         char namebuf[BTRFS_NAME_LEN];
3741         int error;
3742
3743         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3744
3745         dirid = btrfs_root_ref_dirid(eb, ref);
3746         index = btrfs_root_ref_sequence(eb, ref);
3747         name_len = btrfs_root_ref_name_len(eb, ref);
3748
3749         if (name_len <= BTRFS_NAME_LEN) {
3750                 len = name_len;
3751                 error = 0;
3752         } else {
3753                 len = BTRFS_NAME_LEN;
3754                 error = REF_ERR_NAME_TOO_LONG;
3755         }
3756         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3757
3758         if (key->type == BTRFS_ROOT_REF_KEY) {
3759                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3760                                  index, namebuf, len, key->type, error);
3761         } else {
3762                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3763                                  index, namebuf, len, key->type, error);
3764         }
3765         return 0;
3766 }
3767
3768 static void free_corrupt_block(struct cache_extent *cache)
3769 {
3770         struct btrfs_corrupt_block *corrupt;
3771
3772         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3773         free(corrupt);
3774 }
3775
3776 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3777
3778 /*
3779  * Repair the btree of the given root.
3780  *
3781  * The fix is to remove the node key in corrupt_blocks cache_tree.
3782  * and rebalance the tree.
3783  * After the fix, the btree should be writeable.
3784  */
3785 static int repair_btree(struct btrfs_root *root,
3786                         struct cache_tree *corrupt_blocks)
3787 {
3788         struct btrfs_trans_handle *trans;
3789         struct btrfs_path path;
3790         struct btrfs_corrupt_block *corrupt;
3791         struct cache_extent *cache;
3792         struct btrfs_key key;
3793         u64 offset;
3794         int level;
3795         int ret = 0;
3796
3797         if (cache_tree_empty(corrupt_blocks))
3798                 return 0;
3799
3800         trans = btrfs_start_transaction(root, 1);
3801         if (IS_ERR(trans)) {
3802                 ret = PTR_ERR(trans);
3803                 fprintf(stderr, "Error starting transaction: %s\n",
3804                         strerror(-ret));
3805                 return ret;
3806         }
3807         btrfs_init_path(&path);
3808         cache = first_cache_extent(corrupt_blocks);
3809         while (cache) {
3810                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3811                                        cache);
3812                 level = corrupt->level;
3813                 path.lowest_level = level;
3814                 key.objectid = corrupt->key.objectid;
3815                 key.type = corrupt->key.type;
3816                 key.offset = corrupt->key.offset;
3817
3818                 /*
3819                  * Here we don't want to do any tree balance, since it may
3820                  * cause a balance with corrupted brother leaf/node,
3821                  * so ins_len set to 0 here.
3822                  * Balance will be done after all corrupt node/leaf is deleted.
3823                  */
3824                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3825                 if (ret < 0)
3826                         goto out;
3827                 offset = btrfs_node_blockptr(path.nodes[level],
3828                                              path.slots[level]);
3829
3830                 /* Remove the ptr */
3831                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3832                 if (ret < 0)
3833                         goto out;
3834                 /*
3835                  * Remove the corresponding extent
3836                  * return value is not concerned.
3837                  */
3838                 btrfs_release_path(&path);
3839                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3840                                         0, root->root_key.objectid,
3841                                         level - 1, 0);
3842                 cache = next_cache_extent(cache);
3843         }
3844
3845         /* Balance the btree using btrfs_search_slot() */
3846         cache = first_cache_extent(corrupt_blocks);
3847         while (cache) {
3848                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3849                                        cache);
3850                 memcpy(&key, &corrupt->key, sizeof(key));
3851                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3852                 if (ret < 0)
3853                         goto out;
3854                 /* return will always >0 since it won't find the item */
3855                 ret = 0;
3856                 btrfs_release_path(&path);
3857                 cache = next_cache_extent(cache);
3858         }
3859 out:
3860         btrfs_commit_transaction(trans, root);
3861         btrfs_release_path(&path);
3862         return ret;
3863 }
3864
3865 static int check_fs_root(struct btrfs_root *root,
3866                          struct cache_tree *root_cache,
3867                          struct walk_control *wc)
3868 {
3869         int ret = 0;
3870         int err = 0;
3871         int wret;
3872         int level;
3873         struct btrfs_path path;
3874         struct shared_node root_node;
3875         struct root_record *rec;
3876         struct btrfs_root_item *root_item = &root->root_item;
3877         struct cache_tree corrupt_blocks;
3878         struct orphan_data_extent *orphan;
3879         struct orphan_data_extent *tmp;
3880         enum btrfs_tree_block_status status;
3881         struct node_refs nrefs;
3882
3883         /*
3884          * Reuse the corrupt_block cache tree to record corrupted tree block
3885          *
3886          * Unlike the usage in extent tree check, here we do it in a per
3887          * fs/subvol tree base.
3888          */
3889         cache_tree_init(&corrupt_blocks);
3890         root->fs_info->corrupt_blocks = &corrupt_blocks;
3891
3892         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3893                 rec = get_root_rec(root_cache, root->root_key.objectid);
3894                 BUG_ON(IS_ERR(rec));
3895                 if (btrfs_root_refs(root_item) > 0)
3896                         rec->found_root_item = 1;
3897         }
3898
3899         btrfs_init_path(&path);
3900         memset(&root_node, 0, sizeof(root_node));
3901         cache_tree_init(&root_node.root_cache);
3902         cache_tree_init(&root_node.inode_cache);
3903         memset(&nrefs, 0, sizeof(nrefs));
3904
3905         /* Move the orphan extent record to corresponding inode_record */
3906         list_for_each_entry_safe(orphan, tmp,
3907                                  &root->orphan_data_extents, list) {
3908                 struct inode_record *inode;
3909
3910                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3911                                       1);
3912                 BUG_ON(IS_ERR(inode));
3913                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3914                 list_move(&orphan->list, &inode->orphan_extents);
3915         }
3916
3917         level = btrfs_header_level(root->node);
3918         memset(wc->nodes, 0, sizeof(wc->nodes));
3919         wc->nodes[level] = &root_node;
3920         wc->active_node = level;
3921         wc->root_level = level;
3922
3923         /* We may not have checked the root block, lets do that now */
3924         if (btrfs_is_leaf(root->node))
3925                 status = btrfs_check_leaf(root, NULL, root->node);
3926         else
3927                 status = btrfs_check_node(root, NULL, root->node);
3928         if (status != BTRFS_TREE_BLOCK_CLEAN)
3929                 return -EIO;
3930
3931         if (btrfs_root_refs(root_item) > 0 ||
3932             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3933                 path.nodes[level] = root->node;
3934                 extent_buffer_get(root->node);
3935                 path.slots[level] = 0;
3936         } else {
3937                 struct btrfs_key key;
3938                 struct btrfs_disk_key found_key;
3939
3940                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3941                 level = root_item->drop_level;
3942                 path.lowest_level = level;
3943                 if (level > btrfs_header_level(root->node) ||
3944                     level >= BTRFS_MAX_LEVEL) {
3945                         error("ignoring invalid drop level: %u", level);
3946                         goto skip_walking;
3947                 }
3948                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3949                 if (wret < 0)
3950                         goto skip_walking;
3951                 btrfs_node_key(path.nodes[level], &found_key,
3952                                 path.slots[level]);
3953                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3954                                         sizeof(found_key)));
3955         }
3956
3957         while (1) {
3958                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963
3964                 wret = walk_up_tree(root, &path, wc, &level);
3965                 if (wret < 0)
3966                         ret = wret;
3967                 if (wret != 0)
3968                         break;
3969         }
3970 skip_walking:
3971         btrfs_release_path(&path);
3972
3973         if (!cache_tree_empty(&corrupt_blocks)) {
3974                 struct cache_extent *cache;
3975                 struct btrfs_corrupt_block *corrupt;
3976
3977                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3978                        root->root_key.objectid);
3979                 cache = first_cache_extent(&corrupt_blocks);
3980                 while (cache) {
3981                         corrupt = container_of(cache,
3982                                                struct btrfs_corrupt_block,
3983                                                cache);
3984                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3985                                cache->start, corrupt->level,
3986                                corrupt->key.objectid, corrupt->key.type,
3987                                corrupt->key.offset);
3988                         cache = next_cache_extent(cache);
3989                 }
3990                 if (repair) {
3991                         printf("Try to repair the btree for root %llu\n",
3992                                root->root_key.objectid);
3993                         ret = repair_btree(root, &corrupt_blocks);
3994                         if (ret < 0)
3995                                 fprintf(stderr, "Failed to repair btree: %s\n",
3996                                         strerror(-ret));
3997                         if (!ret)
3998                                 printf("Btree for root %llu is fixed\n",
3999                                        root->root_key.objectid);
4000                 }
4001         }
4002
4003         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4004         if (err < 0)
4005                 ret = err;
4006
4007         if (root_node.current) {
4008                 root_node.current->checked = 1;
4009                 maybe_free_inode_rec(&root_node.inode_cache,
4010                                 root_node.current);
4011         }
4012
4013         err = check_inode_recs(root, &root_node.inode_cache);
4014         if (!ret)
4015                 ret = err;
4016
4017         free_corrupt_blocks_tree(&corrupt_blocks);
4018         root->fs_info->corrupt_blocks = NULL;
4019         free_orphan_data_extents(&root->orphan_data_extents);
4020         return ret;
4021 }
4022
4023 static int fs_root_objectid(u64 objectid)
4024 {
4025         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4026             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4027                 return 1;
4028         return is_fstree(objectid);
4029 }
4030
4031 static int check_fs_roots(struct btrfs_root *root,
4032                           struct cache_tree *root_cache)
4033 {
4034         struct btrfs_path path;
4035         struct btrfs_key key;
4036         struct walk_control wc;
4037         struct extent_buffer *leaf, *tree_node;
4038         struct btrfs_root *tmp_root;
4039         struct btrfs_root *tree_root = root->fs_info->tree_root;
4040         int ret;
4041         int err = 0;
4042
4043         if (ctx.progress_enabled) {
4044                 ctx.tp = TASK_FS_ROOTS;
4045                 task_start(ctx.info);
4046         }
4047
4048         /*
4049          * Just in case we made any changes to the extent tree that weren't
4050          * reflected into the free space cache yet.
4051          */
4052         if (repair)
4053                 reset_cached_block_groups(root->fs_info);
4054         memset(&wc, 0, sizeof(wc));
4055         cache_tree_init(&wc.shared);
4056         btrfs_init_path(&path);
4057
4058 again:
4059         key.offset = 0;
4060         key.objectid = 0;
4061         key.type = BTRFS_ROOT_ITEM_KEY;
4062         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4063         if (ret < 0) {
4064                 err = 1;
4065                 goto out;
4066         }
4067         tree_node = tree_root->node;
4068         while (1) {
4069                 if (tree_node != tree_root->node) {
4070                         free_root_recs_tree(root_cache);
4071                         btrfs_release_path(&path);
4072                         goto again;
4073                 }
4074                 leaf = path.nodes[0];
4075                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4076                         ret = btrfs_next_leaf(tree_root, &path);
4077                         if (ret) {
4078                                 if (ret < 0)
4079                                         err = 1;
4080                                 break;
4081                         }
4082                         leaf = path.nodes[0];
4083                 }
4084                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4085                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4086                     fs_root_objectid(key.objectid)) {
4087                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4088                                 tmp_root = btrfs_read_fs_root_no_cache(
4089                                                 root->fs_info, &key);
4090                         } else {
4091                                 key.offset = (u64)-1;
4092                                 tmp_root = btrfs_read_fs_root(
4093                                                 root->fs_info, &key);
4094                         }
4095                         if (IS_ERR(tmp_root)) {
4096                                 err = 1;
4097                                 goto next;
4098                         }
4099                         ret = check_fs_root(tmp_root, root_cache, &wc);
4100                         if (ret == -EAGAIN) {
4101                                 free_root_recs_tree(root_cache);
4102                                 btrfs_release_path(&path);
4103                                 goto again;
4104                         }
4105                         if (ret)
4106                                 err = 1;
4107                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4108                                 btrfs_free_fs_root(tmp_root);
4109                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4110                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4111                         process_root_ref(leaf, path.slots[0], &key,
4112                                          root_cache);
4113                 }
4114 next:
4115                 path.slots[0]++;
4116         }
4117 out:
4118         btrfs_release_path(&path);
4119         if (err)
4120                 free_extent_cache_tree(&wc.shared);
4121         if (!cache_tree_empty(&wc.shared))
4122                 fprintf(stderr, "warning line %d\n", __LINE__);
4123
4124         task_stop(ctx.info);
4125
4126         return err;
4127 }
4128
4129 /*
4130  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4131  * INODE_REF/INODE_EXTREF match.
4132  *
4133  * @root:       the root of the fs/file tree
4134  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4135  * @key:        the key of the DIR_ITEM/DIR_INDEX
4136  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4137  *              distinguish root_dir between normal dir/file
4138  * @name:       the name in the INODE_REF/INODE_EXTREF
4139  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4140  * @mode:       the st_mode of INODE_ITEM
4141  *
4142  * Return 0 if no error occurred.
4143  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4144  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4145  * dir/file.
4146  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4147  * not match for normal dir/file.
4148  */
4149 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4150                          struct btrfs_key *key, u64 index, char *name,
4151                          u32 namelen, u32 mode)
4152 {
4153         struct btrfs_path path;
4154         struct extent_buffer *node;
4155         struct btrfs_dir_item *di;
4156         struct btrfs_key location;
4157         char namebuf[BTRFS_NAME_LEN] = {0};
4158         u32 total;
4159         u32 cur = 0;
4160         u32 len;
4161         u32 name_len;
4162         u32 data_len;
4163         u8 filetype;
4164         int slot;
4165         int ret;
4166
4167         btrfs_init_path(&path);
4168         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4169         if (ret < 0) {
4170                 ret = DIR_ITEM_MISSING;
4171                 goto out;
4172         }
4173
4174         /* Process root dir and goto out*/
4175         if (index == 0) {
4176                 if (ret == 0) {
4177                         ret = ROOT_DIR_ERROR;
4178                         error(
4179                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4180                                 root->objectid,
4181                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4182                                         "REF" : "EXTREF",
4183                                 ref_key->objectid, ref_key->offset,
4184                                 key->type == BTRFS_DIR_ITEM_KEY ?
4185                                         "DIR_ITEM" : "DIR_INDEX");
4186                 } else {
4187                         ret = 0;
4188                 }
4189
4190                 goto out;
4191         }
4192
4193         /* Process normal file/dir */
4194         if (ret > 0) {
4195                 ret = DIR_ITEM_MISSING;
4196                 error(
4197                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4198                         root->objectid,
4199                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4200                         ref_key->objectid, ref_key->offset,
4201                         key->type == BTRFS_DIR_ITEM_KEY ?
4202                                 "DIR_ITEM" : "DIR_INDEX",
4203                         key->objectid, key->offset, namelen, name,
4204                         imode_to_type(mode));
4205                 goto out;
4206         }
4207
4208         /* Check whether inode_id/filetype/name match */
4209         node = path.nodes[0];
4210         slot = path.slots[0];
4211         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4212         total = btrfs_item_size_nr(node, slot);
4213         while (cur < total) {
4214                 ret = DIR_ITEM_MISMATCH;
4215                 name_len = btrfs_dir_name_len(node, di);
4216                 data_len = btrfs_dir_data_len(node, di);
4217
4218                 btrfs_dir_item_key_to_cpu(node, di, &location);
4219                 if (location.objectid != ref_key->objectid ||
4220                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4221                     location.offset != 0)
4222                         goto next;
4223
4224                 filetype = btrfs_dir_type(node, di);
4225                 if (imode_to_type(mode) != filetype)
4226                         goto next;
4227
4228                 if (name_len <= BTRFS_NAME_LEN) {
4229                         len = name_len;
4230                 } else {
4231                         len = BTRFS_NAME_LEN;
4232                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4233                         root->objectid,
4234                         key->type == BTRFS_DIR_ITEM_KEY ?
4235                         "DIR_ITEM" : "DIR_INDEX",
4236                         key->objectid, key->offset, name_len);
4237                 }
4238                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4239                 if (len != namelen || strncmp(namebuf, name, len))
4240                         goto next;
4241
4242                 ret = 0;
4243                 goto out;
4244 next:
4245                 len = sizeof(*di) + name_len + data_len;
4246                 di = (struct btrfs_dir_item *)((char *)di + len);
4247                 cur += len;
4248         }
4249         if (ret == DIR_ITEM_MISMATCH)
4250                 error(
4251                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4252                         root->objectid,
4253                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4254                         ref_key->objectid, ref_key->offset,
4255                         key->type == BTRFS_DIR_ITEM_KEY ?
4256                                 "DIR_ITEM" : "DIR_INDEX",
4257                         key->objectid, key->offset, namelen, name,
4258                         imode_to_type(mode));
4259 out:
4260         btrfs_release_path(&path);
4261         return ret;
4262 }
4263
4264 /*
4265  * Traverse the given INODE_REF and call find_dir_item() to find related
4266  * DIR_ITEM/DIR_INDEX.
4267  *
4268  * @root:       the root of the fs/file tree
4269  * @ref_key:    the key of the INODE_REF
4270  * @refs:       the count of INODE_REF
4271  * @mode:       the st_mode of INODE_ITEM
4272  *
4273  * Return 0 if no error occurred.
4274  */
4275 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4276                            struct extent_buffer *node, int slot, u64 *refs,
4277                            int mode)
4278 {
4279         struct btrfs_key key;
4280         struct btrfs_inode_ref *ref;
4281         char namebuf[BTRFS_NAME_LEN] = {0};
4282         u32 total;
4283         u32 cur = 0;
4284         u32 len;
4285         u32 name_len;
4286         u64 index;
4287         int ret, err = 0;
4288
4289         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4290         total = btrfs_item_size_nr(node, slot);
4291
4292 next:
4293         /* Update inode ref count */
4294         (*refs)++;
4295
4296         index = btrfs_inode_ref_index(node, ref);
4297         name_len = btrfs_inode_ref_name_len(node, ref);
4298         if (name_len <= BTRFS_NAME_LEN) {
4299                 len = name_len;
4300         } else {
4301                 len = BTRFS_NAME_LEN;
4302                 warning("root %llu INODE_REF[%llu %llu] name too long",
4303                         root->objectid, ref_key->objectid, ref_key->offset);
4304         }
4305
4306         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4307
4308         /* Check root dir ref name */
4309         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4310                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4311                       root->objectid, ref_key->objectid, ref_key->offset,
4312                       namebuf);
4313                 err |= ROOT_DIR_ERROR;
4314         }
4315
4316         /* Find related DIR_INDEX */
4317         key.objectid = ref_key->offset;
4318         key.type = BTRFS_DIR_INDEX_KEY;
4319         key.offset = index;
4320         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4321         err |= ret;
4322
4323         /* Find related dir_item */
4324         key.objectid = ref_key->offset;
4325         key.type = BTRFS_DIR_ITEM_KEY;
4326         key.offset = btrfs_name_hash(namebuf, len);
4327         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4328         err |= ret;
4329
4330         len = sizeof(*ref) + name_len;
4331         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4332         cur += len;
4333         if (cur < total)
4334                 goto next;
4335
4336         return err;
4337 }
4338
4339 /*
4340  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4341  * DIR_ITEM/DIR_INDEX.
4342  *
4343  * @root:       the root of the fs/file tree
4344  * @ref_key:    the key of the INODE_EXTREF
4345  * @refs:       the count of INODE_EXTREF
4346  * @mode:       the st_mode of INODE_ITEM
4347  *
4348  * Return 0 if no error occurred.
4349  */
4350 static int check_inode_extref(struct btrfs_root *root,
4351                               struct btrfs_key *ref_key,
4352                               struct extent_buffer *node, int slot, u64 *refs,
4353                               int mode)
4354 {
4355         struct btrfs_key key;
4356         struct btrfs_inode_extref *extref;
4357         char namebuf[BTRFS_NAME_LEN] = {0};
4358         u32 total;
4359         u32 cur = 0;
4360         u32 len;
4361         u32 name_len;
4362         u64 index;
4363         u64 parent;
4364         int ret;
4365         int err = 0;
4366
4367         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4368         total = btrfs_item_size_nr(node, slot);
4369
4370 next:
4371         /* update inode ref count */
4372         (*refs)++;
4373         name_len = btrfs_inode_extref_name_len(node, extref);
4374         index = btrfs_inode_extref_index(node, extref);
4375         parent = btrfs_inode_extref_parent(node, extref);
4376         if (name_len <= BTRFS_NAME_LEN) {
4377                 len = name_len;
4378         } else {
4379                 len = BTRFS_NAME_LEN;
4380                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4381                         root->objectid, ref_key->objectid, ref_key->offset);
4382         }
4383         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4384
4385         /* Check root dir ref name */
4386         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4387                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4388                       root->objectid, ref_key->objectid, ref_key->offset,
4389                       namebuf);
4390                 err |= ROOT_DIR_ERROR;
4391         }
4392
4393         /* find related dir_index */
4394         key.objectid = parent;
4395         key.type = BTRFS_DIR_INDEX_KEY;
4396         key.offset = index;
4397         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4398         err |= ret;
4399
4400         /* find related dir_item */
4401         key.objectid = parent;
4402         key.type = BTRFS_DIR_ITEM_KEY;
4403         key.offset = btrfs_name_hash(namebuf, len);
4404         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4405         err |= ret;
4406
4407         len = sizeof(*extref) + name_len;
4408         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4409         cur += len;
4410
4411         if (cur < total)
4412                 goto next;
4413
4414         return err;
4415 }
4416
4417 /*
4418  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4419  * DIR_ITEM/DIR_INDEX match.
4420  *
4421  * @root:       the root of the fs/file tree
4422  * @key:        the key of the INODE_REF/INODE_EXTREF
4423  * @name:       the name in the INODE_REF/INODE_EXTREF
4424  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4425  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4426  * to (u64)-1
4427  * @ext_ref:    the EXTENDED_IREF feature
4428  *
4429  * Return 0 if no error occurred.
4430  * Return >0 for error bitmap
4431  */
4432 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4433                           char *name, int namelen, u64 index,
4434                           unsigned int ext_ref)
4435 {
4436         struct btrfs_path path;
4437         struct btrfs_inode_ref *ref;
4438         struct btrfs_inode_extref *extref;
4439         struct extent_buffer *node;
4440         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4441         u32 total;
4442         u32 cur = 0;
4443         u32 len;
4444         u32 ref_namelen;
4445         u64 ref_index;
4446         u64 parent;
4447         u64 dir_id;
4448         int slot;
4449         int ret;
4450
4451         btrfs_init_path(&path);
4452         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4453         if (ret) {
4454                 ret = INODE_REF_MISSING;
4455                 goto extref;
4456         }
4457
4458         node = path.nodes[0];
4459         slot = path.slots[0];
4460
4461         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4462         total = btrfs_item_size_nr(node, slot);
4463
4464         /* Iterate all entry of INODE_REF */
4465         while (cur < total) {
4466                 ret = INODE_REF_MISSING;
4467
4468                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4469                 ref_index = btrfs_inode_ref_index(node, ref);
4470                 if (index != (u64)-1 && index != ref_index)
4471                         goto next_ref;
4472
4473                 if (ref_namelen <= BTRFS_NAME_LEN) {
4474                         len = ref_namelen;
4475                 } else {
4476                         len = BTRFS_NAME_LEN;
4477                         warning("root %llu INODE %s[%llu %llu] name too long",
4478                                 root->objectid,
4479                                 key->type == BTRFS_INODE_REF_KEY ?
4480                                         "REF" : "EXTREF",
4481                                 key->objectid, key->offset);
4482                 }
4483                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4484                                    len);
4485
4486                 if (len != namelen || strncmp(ref_namebuf, name, len))
4487                         goto next_ref;
4488
4489                 ret = 0;
4490                 goto out;
4491 next_ref:
4492                 len = sizeof(*ref) + ref_namelen;
4493                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4494                 cur += len;
4495         }
4496
4497 extref:
4498         /* Skip if not support EXTENDED_IREF feature */
4499         if (!ext_ref)
4500                 goto out;
4501
4502         btrfs_release_path(&path);
4503         btrfs_init_path(&path);
4504
4505         dir_id = key->offset;
4506         key->type = BTRFS_INODE_EXTREF_KEY;
4507         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4508
4509         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4510         if (ret) {
4511                 ret = INODE_REF_MISSING;
4512                 goto out;
4513         }
4514
4515         node = path.nodes[0];
4516         slot = path.slots[0];
4517
4518         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4519         cur = 0;
4520         total = btrfs_item_size_nr(node, slot);
4521
4522         /* Iterate all entry of INODE_EXTREF */
4523         while (cur < total) {
4524                 ret = INODE_REF_MISSING;
4525
4526                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4527                 ref_index = btrfs_inode_extref_index(node, extref);
4528                 parent = btrfs_inode_extref_parent(node, extref);
4529                 if (index != (u64)-1 && index != ref_index)
4530                         goto next_extref;
4531
4532                 if (parent != dir_id)
4533                         goto next_extref;
4534
4535                 if (ref_namelen <= BTRFS_NAME_LEN) {
4536                         len = ref_namelen;
4537                 } else {
4538                         len = BTRFS_NAME_LEN;
4539                         warning("root %llu INODE %s[%llu %llu] name too long",
4540                                 root->objectid,
4541                                 key->type == BTRFS_INODE_REF_KEY ?
4542                                         "REF" : "EXTREF",
4543                                 key->objectid, key->offset);
4544                 }
4545                 read_extent_buffer(node, ref_namebuf,
4546                                    (unsigned long)(extref + 1), len);
4547
4548                 if (len != namelen || strncmp(ref_namebuf, name, len))
4549                         goto next_extref;
4550
4551                 ret = 0;
4552                 goto out;
4553
4554 next_extref:
4555                 len = sizeof(*extref) + ref_namelen;
4556                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4557                 cur += len;
4558
4559         }
4560 out:
4561         btrfs_release_path(&path);
4562         return ret;
4563 }
4564
4565 /*
4566  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4567  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4568  *
4569  * @root:       the root of the fs/file tree
4570  * @key:        the key of the INODE_REF/INODE_EXTREF
4571  * @size:       the st_size of the INODE_ITEM
4572  * @ext_ref:    the EXTENDED_IREF feature
4573  *
4574  * Return 0 if no error occurred.
4575  */
4576 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4577                           struct extent_buffer *node, int slot, u64 *size,
4578                           unsigned int ext_ref)
4579 {
4580         struct btrfs_dir_item *di;
4581         struct btrfs_inode_item *ii;
4582         struct btrfs_path path;
4583         struct btrfs_key location;
4584         char namebuf[BTRFS_NAME_LEN] = {0};
4585         u32 total;
4586         u32 cur = 0;
4587         u32 len;
4588         u32 name_len;
4589         u32 data_len;
4590         u8 filetype;
4591         u32 mode;
4592         u64 index;
4593         int ret;
4594         int err = 0;
4595
4596         /*
4597          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4598          * ignore index check.
4599          */
4600         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4601
4602         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4603         total = btrfs_item_size_nr(node, slot);
4604
4605         while (cur < total) {
4606                 data_len = btrfs_dir_data_len(node, di);
4607                 if (data_len)
4608                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4609                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4610                               "DIR_ITEM" : "DIR_INDEX",
4611                               key->objectid, key->offset, data_len);
4612
4613                 name_len = btrfs_dir_name_len(node, di);
4614                 if (name_len <= BTRFS_NAME_LEN) {
4615                         len = name_len;
4616                 } else {
4617                         len = BTRFS_NAME_LEN;
4618                         warning("root %llu %s[%llu %llu] name too long",
4619                                 root->objectid,
4620                                 key->type == BTRFS_DIR_ITEM_KEY ?
4621                                 "DIR_ITEM" : "DIR_INDEX",
4622                                 key->objectid, key->offset);
4623                 }
4624                 (*size) += name_len;
4625
4626                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4627                 filetype = btrfs_dir_type(node, di);
4628
4629                 btrfs_init_path(&path);
4630                 btrfs_dir_item_key_to_cpu(node, di, &location);
4631
4632                 /* Ignore related ROOT_ITEM check */
4633                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4634                         goto next;
4635
4636                 /* Check relative INODE_ITEM(existence/filetype) */
4637                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4638                 if (ret) {
4639                         err |= INODE_ITEM_MISSING;
4640                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4641                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643                               key->offset, location.objectid, name_len,
4644                               namebuf, filetype);
4645                         goto next;
4646                 }
4647
4648                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4649                                     struct btrfs_inode_item);
4650                 mode = btrfs_inode_mode(path.nodes[0], ii);
4651
4652                 if (imode_to_type(mode) != filetype) {
4653                         err |= INODE_ITEM_MISMATCH;
4654                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4655                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4656                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4657                               key->offset, name_len, namebuf, filetype);
4658                 }
4659
4660                 /* Check relative INODE_REF/INODE_EXTREF */
4661                 location.type = BTRFS_INODE_REF_KEY;
4662                 location.offset = key->objectid;
4663                 ret = find_inode_ref(root, &location, namebuf, len,
4664                                        index, ext_ref);
4665                 err |= ret;
4666                 if (ret & INODE_REF_MISSING)
4667                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4668                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4669                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4670                               key->offset, name_len, namebuf, filetype);
4671
4672 next:
4673                 btrfs_release_path(&path);
4674                 len = sizeof(*di) + name_len + data_len;
4675                 di = (struct btrfs_dir_item *)((char *)di + len);
4676                 cur += len;
4677
4678                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4679                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4680                               root->objectid, key->objectid, key->offset);
4681                         break;
4682                 }
4683         }
4684
4685         return err;
4686 }
4687
4688 /*
4689  * Check file extent datasum/hole, update the size of the file extents,
4690  * check and update the last offset of the file extent.
4691  *
4692  * @root:       the root of fs/file tree.
4693  * @fkey:       the key of the file extent.
4694  * @nodatasum:  INODE_NODATASUM feature.
4695  * @size:       the sum of all EXTENT_DATA items size for this inode.
4696  * @end:        the offset of the last extent.
4697  *
4698  * Return 0 if no error occurred.
4699  */
4700 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4701                              struct extent_buffer *node, int slot,
4702                              unsigned int nodatasum, u64 *size, u64 *end)
4703 {
4704         struct btrfs_file_extent_item *fi;
4705         u64 disk_bytenr;
4706         u64 disk_num_bytes;
4707         u64 extent_num_bytes;
4708         u64 extent_offset;
4709         u64 csum_found;         /* In byte size, sectorsize aligned */
4710         u64 search_start;       /* Logical range start we search for csum */
4711         u64 search_len;         /* Logical range len we search for csum */
4712         unsigned int extent_type;
4713         unsigned int is_hole;
4714         int compressed = 0;
4715         int ret;
4716         int err = 0;
4717
4718         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4719
4720         /* Check inline extent */
4721         extent_type = btrfs_file_extent_type(node, fi);
4722         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4723                 struct btrfs_item *e = btrfs_item_nr(slot);
4724                 u32 item_inline_len;
4725
4726                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4727                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4728                 compressed = btrfs_file_extent_compression(node, fi);
4729                 if (extent_num_bytes == 0) {
4730                         error(
4731                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4732                                 root->objectid, fkey->objectid, fkey->offset);
4733                         err |= FILE_EXTENT_ERROR;
4734                 }
4735                 if (!compressed && extent_num_bytes != item_inline_len) {
4736                         error(
4737                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4738                                 root->objectid, fkey->objectid, fkey->offset,
4739                                 extent_num_bytes, item_inline_len);
4740                         err |= FILE_EXTENT_ERROR;
4741                 }
4742                 *size += extent_num_bytes;
4743                 return err;
4744         }
4745
4746         /* Check extent type */
4747         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4748                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4749                 err |= FILE_EXTENT_ERROR;
4750                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752                 return err;
4753         }
4754
4755         /* Check REG_EXTENT/PREALLOC_EXTENT */
4756         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4757         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4758         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4759         extent_offset = btrfs_file_extent_offset(node, fi);
4760         compressed = btrfs_file_extent_compression(node, fi);
4761         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4762
4763         /*
4764          * Check EXTENT_DATA csum
4765          *
4766          * For plain (uncompressed) extent, we should only check the range
4767          * we're referring to, as it's possible that part of prealloc extent
4768          * has been written, and has csum:
4769          *
4770          * |<--- Original large preallocated extent A ---->|
4771          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4772          *      No csum                         Has csum
4773          *
4774          * For compressed extent, we should check the whole range.
4775          */
4776         if (!compressed) {
4777                 search_start = disk_bytenr + extent_offset;
4778                 search_len = extent_num_bytes;
4779         } else {
4780                 search_start = disk_bytenr;
4781                 search_len = disk_num_bytes;
4782         }
4783         ret = count_csum_range(root, search_start, search_len, &csum_found);
4784         if (csum_found > 0 && nodatasum) {
4785                 err |= ODD_CSUM_ITEM;
4786                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4787                       root->objectid, fkey->objectid, fkey->offset);
4788         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4789                    !is_hole && (ret < 0 || csum_found < search_len)) {
4790                 err |= CSUM_ITEM_MISSING;
4791                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4792                       root->objectid, fkey->objectid, fkey->offset,
4793                       csum_found, search_len);
4794         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4795                 err |= ODD_CSUM_ITEM;
4796                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4797                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4798         }
4799
4800         /* Check EXTENT_DATA hole */
4801         if (no_holes && is_hole) {
4802                 err |= FILE_EXTENT_ERROR;
4803                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4804                       root->objectid, fkey->objectid, fkey->offset);
4805         } else if (!no_holes && *end != fkey->offset) {
4806                 err |= FILE_EXTENT_ERROR;
4807                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4808                       root->objectid, fkey->objectid, fkey->offset);
4809         }
4810
4811         *end += extent_num_bytes;
4812         if (!is_hole)
4813                 *size += extent_num_bytes;
4814
4815         return err;
4816 }
4817
4818 /*
4819  * Check INODE_ITEM and related ITEMs (the same inode number)
4820  * 1. check link count
4821  * 2. check inode ref/extref
4822  * 3. check dir item/index
4823  *
4824  * @ext_ref:    the EXTENDED_IREF feature
4825  *
4826  * Return 0 if no error occurred.
4827  * Return >0 for error or hit the traversal is done(by error bitmap)
4828  */
4829 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4830                             unsigned int ext_ref)
4831 {
4832         struct extent_buffer *node;
4833         struct btrfs_inode_item *ii;
4834         struct btrfs_key key;
4835         u64 inode_id;
4836         u32 mode;
4837         u64 nlink;
4838         u64 nbytes;
4839         u64 isize;
4840         u64 size = 0;
4841         u64 refs = 0;
4842         u64 extent_end = 0;
4843         u64 extent_size = 0;
4844         unsigned int dir;
4845         unsigned int nodatasum;
4846         int slot;
4847         int ret;
4848         int err = 0;
4849
4850         node = path->nodes[0];
4851         slot = path->slots[0];
4852
4853         btrfs_item_key_to_cpu(node, &key, slot);
4854         inode_id = key.objectid;
4855
4856         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4857                 ret = btrfs_next_item(root, path);
4858                 if (ret > 0)
4859                         err |= LAST_ITEM;
4860                 return err;
4861         }
4862
4863         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4864         isize = btrfs_inode_size(node, ii);
4865         nbytes = btrfs_inode_nbytes(node, ii);
4866         mode = btrfs_inode_mode(node, ii);
4867         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4868         nlink = btrfs_inode_nlink(node, ii);
4869         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4870
4871         while (1) {
4872                 ret = btrfs_next_item(root, path);
4873                 if (ret < 0) {
4874                         /* out will fill 'err' rusing current statistics */
4875                         goto out;
4876                 } else if (ret > 0) {
4877                         err |= LAST_ITEM;
4878                         goto out;
4879                 }
4880
4881                 node = path->nodes[0];
4882                 slot = path->slots[0];
4883                 btrfs_item_key_to_cpu(node, &key, slot);
4884                 if (key.objectid != inode_id)
4885                         goto out;
4886
4887                 switch (key.type) {
4888                 case BTRFS_INODE_REF_KEY:
4889                         ret = check_inode_ref(root, &key, node, slot, &refs,
4890                                               mode);
4891                         err |= ret;
4892                         break;
4893                 case BTRFS_INODE_EXTREF_KEY:
4894                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4895                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4896                                         root->objectid, key.objectid,
4897                                         key.offset);
4898                         ret = check_inode_extref(root, &key, node, slot, &refs,
4899                                                  mode);
4900                         err |= ret;
4901                         break;
4902                 case BTRFS_DIR_ITEM_KEY:
4903                 case BTRFS_DIR_INDEX_KEY:
4904                         if (!dir) {
4905                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4906                                         root->objectid, inode_id,
4907                                         imode_to_type(mode), key.objectid,
4908                                         key.offset);
4909                         }
4910                         ret = check_dir_item(root, &key, node, slot, &size,
4911                                              ext_ref);
4912                         err |= ret;
4913                         break;
4914                 case BTRFS_EXTENT_DATA_KEY:
4915                         if (dir) {
4916                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4917                                         root->objectid, inode_id, key.objectid,
4918                                         key.offset);
4919                         }
4920                         ret = check_file_extent(root, &key, node, slot,
4921                                                 nodatasum, &extent_size,
4922                                                 &extent_end);
4923                         err |= ret;
4924                         break;
4925                 case BTRFS_XATTR_ITEM_KEY:
4926                         break;
4927                 default:
4928                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4929                               key.objectid, key.type, key.offset);
4930                 }
4931         }
4932
4933 out:
4934         /* verify INODE_ITEM nlink/isize/nbytes */
4935         if (dir) {
4936                 if (nlink != 1) {
4937                         err |= LINK_COUNT_ERROR;
4938                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4939                               root->objectid, inode_id, nlink);
4940                 }
4941
4942                 /*
4943                  * Just a warning, as dir inode nbytes is just an
4944                  * instructive value.
4945                  */
4946                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4947                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4948                                 root->objectid, inode_id, root->nodesize);
4949                 }
4950
4951                 if (isize != size) {
4952                         err |= ISIZE_ERROR;
4953                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4954                               root->objectid, inode_id, isize, size);
4955                 }
4956         } else {
4957                 if (nlink != refs) {
4958                         err |= LINK_COUNT_ERROR;
4959                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4960                               root->objectid, inode_id, nlink, refs);
4961                 } else if (!nlink) {
4962                         err |= ORPHAN_ITEM;
4963                 }
4964
4965                 if (!nbytes && !no_holes && extent_end < isize) {
4966                         err |= NBYTES_ERROR;
4967                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4968                               root->objectid, inode_id, isize);
4969                 }
4970
4971                 if (nbytes != extent_size) {
4972                         err |= NBYTES_ERROR;
4973                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4974                               root->objectid, inode_id, nbytes, extent_size);
4975                 }
4976         }
4977
4978         return err;
4979 }
4980
4981 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4982 {
4983         struct btrfs_path path;
4984         struct btrfs_key key;
4985         int err = 0;
4986         int ret;
4987
4988         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4989         key.type = BTRFS_INODE_ITEM_KEY;
4990         key.offset = 0;
4991
4992         /* For root being dropped, we don't need to check first inode */
4993         if (btrfs_root_refs(&root->root_item) == 0 &&
4994             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4995             key.objectid)
4996                 return 0;
4997
4998         btrfs_init_path(&path);
4999
5000         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5001         if (ret < 0)
5002                 goto out;
5003         if (ret > 0) {
5004                 ret = 0;
5005                 err |= INODE_ITEM_MISSING;
5006                 error("first inode item of root %llu is missing",
5007                       root->objectid);
5008         }
5009
5010         err |= check_inode_item(root, &path, ext_ref);
5011         err &= ~LAST_ITEM;
5012         if (err && !ret)
5013                 ret = -EIO;
5014 out:
5015         btrfs_release_path(&path);
5016         return ret;
5017 }
5018
5019 /*
5020  * Iterate all item on the tree and call check_inode_item() to check.
5021  *
5022  * @root:       the root of the tree to be checked.
5023  * @ext_ref:    the EXTENDED_IREF feature
5024  *
5025  * Return 0 if no error found.
5026  * Return <0 for error.
5027  */
5028 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5029 {
5030         struct btrfs_path path;
5031         struct node_refs nrefs;
5032         struct btrfs_root_item *root_item = &root->root_item;
5033         int ret;
5034         int level;
5035         int err = 0;
5036
5037         /*
5038          * We need to manually check the first inode item(256)
5039          * As the following traversal function will only start from
5040          * the first inode item in the leaf, if inode item(256) is missing
5041          * we will just skip it forever.
5042          */
5043         ret = check_fs_first_inode(root, ext_ref);
5044         if (ret < 0)
5045                 return ret;
5046
5047         memset(&nrefs, 0, sizeof(nrefs));
5048         level = btrfs_header_level(root->node);
5049         btrfs_init_path(&path);
5050
5051         if (btrfs_root_refs(root_item) > 0 ||
5052             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5053                 path.nodes[level] = root->node;
5054                 path.slots[level] = 0;
5055                 extent_buffer_get(root->node);
5056         } else {
5057                 struct btrfs_key key;
5058
5059                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5060                 level = root_item->drop_level;
5061                 path.lowest_level = level;
5062                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5063                 if (ret < 0)
5064                         goto out;
5065                 ret = 0;
5066         }
5067
5068         while (1) {
5069                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5070                 err |= !!ret;
5071
5072                 /* if ret is negative, walk shall stop */
5073                 if (ret < 0) {
5074                         ret = err;
5075                         break;
5076                 }
5077
5078                 ret = walk_up_tree_v2(root, &path, &level);
5079                 if (ret != 0) {
5080                         /* Normal exit, reset ret to err */
5081                         ret = err;
5082                         break;
5083                 }
5084         }
5085
5086 out:
5087         btrfs_release_path(&path);
5088         return ret;
5089 }
5090
5091 /*
5092  * Find the relative ref for root_ref and root_backref.
5093  *
5094  * @root:       the root of the root tree.
5095  * @ref_key:    the key of the root ref.
5096  *
5097  * Return 0 if no error occurred.
5098  */
5099 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5100                           struct extent_buffer *node, int slot)
5101 {
5102         struct btrfs_path path;
5103         struct btrfs_key key;
5104         struct btrfs_root_ref *ref;
5105         struct btrfs_root_ref *backref;
5106         char ref_name[BTRFS_NAME_LEN] = {0};
5107         char backref_name[BTRFS_NAME_LEN] = {0};
5108         u64 ref_dirid;
5109         u64 ref_seq;
5110         u32 ref_namelen;
5111         u64 backref_dirid;
5112         u64 backref_seq;
5113         u32 backref_namelen;
5114         u32 len;
5115         int ret;
5116         int err = 0;
5117
5118         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5119         ref_dirid = btrfs_root_ref_dirid(node, ref);
5120         ref_seq = btrfs_root_ref_sequence(node, ref);
5121         ref_namelen = btrfs_root_ref_name_len(node, ref);
5122
5123         if (ref_namelen <= BTRFS_NAME_LEN) {
5124                 len = ref_namelen;
5125         } else {
5126                 len = BTRFS_NAME_LEN;
5127                 warning("%s[%llu %llu] ref_name too long",
5128                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5129                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5130                         ref_key->offset);
5131         }
5132         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5133
5134         /* Find relative root_ref */
5135         key.objectid = ref_key->offset;
5136         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5137         key.offset = ref_key->objectid;
5138
5139         btrfs_init_path(&path);
5140         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5141         if (ret) {
5142                 err |= ROOT_REF_MISSING;
5143                 error("%s[%llu %llu] couldn't find relative ref",
5144                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5145                       "ROOT_REF" : "ROOT_BACKREF",
5146                       ref_key->objectid, ref_key->offset);
5147                 goto out;
5148         }
5149
5150         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5151                                  struct btrfs_root_ref);
5152         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5153         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5154         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5155
5156         if (backref_namelen <= BTRFS_NAME_LEN) {
5157                 len = backref_namelen;
5158         } else {
5159                 len = BTRFS_NAME_LEN;
5160                 warning("%s[%llu %llu] ref_name too long",
5161                         key.type == BTRFS_ROOT_REF_KEY ?
5162                         "ROOT_REF" : "ROOT_BACKREF",
5163                         key.objectid, key.offset);
5164         }
5165         read_extent_buffer(path.nodes[0], backref_name,
5166                            (unsigned long)(backref + 1), len);
5167
5168         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5169             ref_namelen != backref_namelen ||
5170             strncmp(ref_name, backref_name, len)) {
5171                 err |= ROOT_REF_MISMATCH;
5172                 error("%s[%llu %llu] mismatch relative ref",
5173                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5174                       "ROOT_REF" : "ROOT_BACKREF",
5175                       ref_key->objectid, ref_key->offset);
5176         }
5177 out:
5178         btrfs_release_path(&path);
5179         return err;
5180 }
5181
5182 /*
5183  * Check all fs/file tree in low_memory mode.
5184  *
5185  * 1. for fs tree root item, call check_fs_root_v2()
5186  * 2. for fs tree root ref/backref, call check_root_ref()
5187  *
5188  * Return 0 if no error occurred.
5189  */
5190 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5191 {
5192         struct btrfs_root *tree_root = fs_info->tree_root;
5193         struct btrfs_root *cur_root = NULL;
5194         struct btrfs_path path;
5195         struct btrfs_key key;
5196         struct extent_buffer *node;
5197         unsigned int ext_ref;
5198         int slot;
5199         int ret;
5200         int err = 0;
5201
5202         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5203
5204         btrfs_init_path(&path);
5205         key.objectid = BTRFS_FS_TREE_OBJECTID;
5206         key.offset = 0;
5207         key.type = BTRFS_ROOT_ITEM_KEY;
5208
5209         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5210         if (ret < 0) {
5211                 err = ret;
5212                 goto out;
5213         } else if (ret > 0) {
5214                 err = -ENOENT;
5215                 goto out;
5216         }
5217
5218         while (1) {
5219                 node = path.nodes[0];
5220                 slot = path.slots[0];
5221                 btrfs_item_key_to_cpu(node, &key, slot);
5222                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5223                         goto out;
5224                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5225                     fs_root_objectid(key.objectid)) {
5226                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5227                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5228                                                                        &key);
5229                         } else {
5230                                 key.offset = (u64)-1;
5231                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5232                         }
5233
5234                         if (IS_ERR(cur_root)) {
5235                                 error("Fail to read fs/subvol tree: %lld",
5236                                       key.objectid);
5237                                 err = -EIO;
5238                                 goto next;
5239                         }
5240
5241                         ret = check_fs_root_v2(cur_root, ext_ref);
5242                         err |= ret;
5243
5244                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5245                                 btrfs_free_fs_root(cur_root);
5246                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5247                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5248                         ret = check_root_ref(tree_root, &key, node, slot);
5249                         err |= ret;
5250                 }
5251 next:
5252                 ret = btrfs_next_item(tree_root, &path);
5253                 if (ret > 0)
5254                         goto out;
5255                 if (ret < 0) {
5256                         err = ret;
5257                         goto out;
5258                 }
5259         }
5260
5261 out:
5262         btrfs_release_path(&path);
5263         return err;
5264 }
5265
5266 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5267 {
5268         struct list_head *cur = rec->backrefs.next;
5269         struct extent_backref *back;
5270         struct tree_backref *tback;
5271         struct data_backref *dback;
5272         u64 found = 0;
5273         int err = 0;
5274
5275         while(cur != &rec->backrefs) {
5276                 back = to_extent_backref(cur);
5277                 cur = cur->next;
5278                 if (!back->found_extent_tree) {
5279                         err = 1;
5280                         if (!print_errs)
5281                                 goto out;
5282                         if (back->is_data) {
5283                                 dback = to_data_backref(back);
5284                                 fprintf(stderr, "Backref %llu %s %llu"
5285                                         " owner %llu offset %llu num_refs %lu"
5286                                         " not found in extent tree\n",
5287                                         (unsigned long long)rec->start,
5288                                         back->full_backref ?
5289                                         "parent" : "root",
5290                                         back->full_backref ?
5291                                         (unsigned long long)dback->parent:
5292                                         (unsigned long long)dback->root,
5293                                         (unsigned long long)dback->owner,
5294                                         (unsigned long long)dback->offset,
5295                                         (unsigned long)dback->num_refs);
5296                         } else {
5297                                 tback = to_tree_backref(back);
5298                                 fprintf(stderr, "Backref %llu parent %llu"
5299                                         " root %llu not found in extent tree\n",
5300                                         (unsigned long long)rec->start,
5301                                         (unsigned long long)tback->parent,
5302                                         (unsigned long long)tback->root);
5303                         }
5304                 }
5305                 if (!back->is_data && !back->found_ref) {
5306                         err = 1;
5307                         if (!print_errs)
5308                                 goto out;
5309                         tback = to_tree_backref(back);
5310                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5311                                 (unsigned long long)rec->start,
5312                                 back->full_backref ? "parent" : "root",
5313                                 back->full_backref ?
5314                                 (unsigned long long)tback->parent :
5315                                 (unsigned long long)tback->root, back);
5316                 }
5317                 if (back->is_data) {
5318                         dback = to_data_backref(back);
5319                         if (dback->found_ref != dback->num_refs) {
5320                                 err = 1;
5321                                 if (!print_errs)
5322                                         goto out;
5323                                 fprintf(stderr, "Incorrect local backref count"
5324                                         " on %llu %s %llu owner %llu"
5325                                         " offset %llu found %u wanted %u back %p\n",
5326                                         (unsigned long long)rec->start,
5327                                         back->full_backref ?
5328                                         "parent" : "root",
5329                                         back->full_backref ?
5330                                         (unsigned long long)dback->parent:
5331                                         (unsigned long long)dback->root,
5332                                         (unsigned long long)dback->owner,
5333                                         (unsigned long long)dback->offset,
5334                                         dback->found_ref, dback->num_refs, back);
5335                         }
5336                         if (dback->disk_bytenr != rec->start) {
5337                                 err = 1;
5338                                 if (!print_errs)
5339                                         goto out;
5340                                 fprintf(stderr, "Backref disk bytenr does not"
5341                                         " match extent record, bytenr=%llu, "
5342                                         "ref bytenr=%llu\n",
5343                                         (unsigned long long)rec->start,
5344                                         (unsigned long long)dback->disk_bytenr);
5345                         }
5346
5347                         if (dback->bytes != rec->nr) {
5348                                 err = 1;
5349                                 if (!print_errs)
5350                                         goto out;
5351                                 fprintf(stderr, "Backref bytes do not match "
5352                                         "extent backref, bytenr=%llu, ref "
5353                                         "bytes=%llu, backref bytes=%llu\n",
5354                                         (unsigned long long)rec->start,
5355                                         (unsigned long long)rec->nr,
5356                                         (unsigned long long)dback->bytes);
5357                         }
5358                 }
5359                 if (!back->is_data) {
5360                         found += 1;
5361                 } else {
5362                         dback = to_data_backref(back);
5363                         found += dback->found_ref;
5364                 }
5365         }
5366         if (found != rec->refs) {
5367                 err = 1;
5368                 if (!print_errs)
5369                         goto out;
5370                 fprintf(stderr, "Incorrect global backref count "
5371                         "on %llu found %llu wanted %llu\n",
5372                         (unsigned long long)rec->start,
5373                         (unsigned long long)found,
5374                         (unsigned long long)rec->refs);
5375         }
5376 out:
5377         return err;
5378 }
5379
5380 static int free_all_extent_backrefs(struct extent_record *rec)
5381 {
5382         struct extent_backref *back;
5383         struct list_head *cur;
5384         while (!list_empty(&rec->backrefs)) {
5385                 cur = rec->backrefs.next;
5386                 back = to_extent_backref(cur);
5387                 list_del(cur);
5388                 free(back);
5389         }
5390         return 0;
5391 }
5392
5393 static void free_extent_record_cache(struct cache_tree *extent_cache)
5394 {
5395         struct cache_extent *cache;
5396         struct extent_record *rec;
5397
5398         while (1) {
5399                 cache = first_cache_extent(extent_cache);
5400                 if (!cache)
5401                         break;
5402                 rec = container_of(cache, struct extent_record, cache);
5403                 remove_cache_extent(extent_cache, cache);
5404                 free_all_extent_backrefs(rec);
5405                 free(rec);
5406         }
5407 }
5408
5409 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5410                                  struct extent_record *rec)
5411 {
5412         if (rec->content_checked && rec->owner_ref_checked &&
5413             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5414             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5415             !rec->bad_full_backref && !rec->crossing_stripes &&
5416             !rec->wrong_chunk_type) {
5417                 remove_cache_extent(extent_cache, &rec->cache);
5418                 free_all_extent_backrefs(rec);
5419                 list_del_init(&rec->list);
5420                 free(rec);
5421         }
5422         return 0;
5423 }
5424
5425 static int check_owner_ref(struct btrfs_root *root,
5426                             struct extent_record *rec,
5427                             struct extent_buffer *buf)
5428 {
5429         struct extent_backref *node;
5430         struct tree_backref *back;
5431         struct btrfs_root *ref_root;
5432         struct btrfs_key key;
5433         struct btrfs_path path;
5434         struct extent_buffer *parent;
5435         int level;
5436         int found = 0;
5437         int ret;
5438
5439         list_for_each_entry(node, &rec->backrefs, list) {
5440                 if (node->is_data)
5441                         continue;
5442                 if (!node->found_ref)
5443                         continue;
5444                 if (node->full_backref)
5445                         continue;
5446                 back = to_tree_backref(node);
5447                 if (btrfs_header_owner(buf) == back->root)
5448                         return 0;
5449         }
5450         BUG_ON(rec->is_root);
5451
5452         /* try to find the block by search corresponding fs tree */
5453         key.objectid = btrfs_header_owner(buf);
5454         key.type = BTRFS_ROOT_ITEM_KEY;
5455         key.offset = (u64)-1;
5456
5457         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5458         if (IS_ERR(ref_root))
5459                 return 1;
5460
5461         level = btrfs_header_level(buf);
5462         if (level == 0)
5463                 btrfs_item_key_to_cpu(buf, &key, 0);
5464         else
5465                 btrfs_node_key_to_cpu(buf, &key, 0);
5466
5467         btrfs_init_path(&path);
5468         path.lowest_level = level + 1;
5469         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5470         if (ret < 0)
5471                 return 0;
5472
5473         parent = path.nodes[level + 1];
5474         if (parent && buf->start == btrfs_node_blockptr(parent,
5475                                                         path.slots[level + 1]))
5476                 found = 1;
5477
5478         btrfs_release_path(&path);
5479         return found ? 0 : 1;
5480 }
5481
5482 static int is_extent_tree_record(struct extent_record *rec)
5483 {
5484         struct list_head *cur = rec->backrefs.next;
5485         struct extent_backref *node;
5486         struct tree_backref *back;
5487         int is_extent = 0;
5488
5489         while(cur != &rec->backrefs) {
5490                 node = to_extent_backref(cur);
5491                 cur = cur->next;
5492                 if (node->is_data)
5493                         return 0;
5494                 back = to_tree_backref(node);
5495                 if (node->full_backref)
5496                         return 0;
5497                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5498                         is_extent = 1;
5499         }
5500         return is_extent;
5501 }
5502
5503
5504 static int record_bad_block_io(struct btrfs_fs_info *info,
5505                                struct cache_tree *extent_cache,
5506                                u64 start, u64 len)
5507 {
5508         struct extent_record *rec;
5509         struct cache_extent *cache;
5510         struct btrfs_key key;
5511
5512         cache = lookup_cache_extent(extent_cache, start, len);
5513         if (!cache)
5514                 return 0;
5515
5516         rec = container_of(cache, struct extent_record, cache);
5517         if (!is_extent_tree_record(rec))
5518                 return 0;
5519
5520         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5521         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5522 }
5523
5524 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5525                        struct extent_buffer *buf, int slot)
5526 {
5527         if (btrfs_header_level(buf)) {
5528                 struct btrfs_key_ptr ptr1, ptr2;
5529
5530                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5531                                    sizeof(struct btrfs_key_ptr));
5532                 read_extent_buffer(buf, &ptr2,
5533                                    btrfs_node_key_ptr_offset(slot + 1),
5534                                    sizeof(struct btrfs_key_ptr));
5535                 write_extent_buffer(buf, &ptr1,
5536                                     btrfs_node_key_ptr_offset(slot + 1),
5537                                     sizeof(struct btrfs_key_ptr));
5538                 write_extent_buffer(buf, &ptr2,
5539                                     btrfs_node_key_ptr_offset(slot),
5540                                     sizeof(struct btrfs_key_ptr));
5541                 if (slot == 0) {
5542                         struct btrfs_disk_key key;
5543                         btrfs_node_key(buf, &key, 0);
5544                         btrfs_fixup_low_keys(root, path, &key,
5545                                              btrfs_header_level(buf) + 1);
5546                 }
5547         } else {
5548                 struct btrfs_item *item1, *item2;
5549                 struct btrfs_key k1, k2;
5550                 char *item1_data, *item2_data;
5551                 u32 item1_offset, item2_offset, item1_size, item2_size;
5552
5553                 item1 = btrfs_item_nr(slot);
5554                 item2 = btrfs_item_nr(slot + 1);
5555                 btrfs_item_key_to_cpu(buf, &k1, slot);
5556                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5557                 item1_offset = btrfs_item_offset(buf, item1);
5558                 item2_offset = btrfs_item_offset(buf, item2);
5559                 item1_size = btrfs_item_size(buf, item1);
5560                 item2_size = btrfs_item_size(buf, item2);
5561
5562                 item1_data = malloc(item1_size);
5563                 if (!item1_data)
5564                         return -ENOMEM;
5565                 item2_data = malloc(item2_size);
5566                 if (!item2_data) {
5567                         free(item1_data);
5568                         return -ENOMEM;
5569                 }
5570
5571                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5572                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5573
5574                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5575                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5576                 free(item1_data);
5577                 free(item2_data);
5578
5579                 btrfs_set_item_offset(buf, item1, item2_offset);
5580                 btrfs_set_item_offset(buf, item2, item1_offset);
5581                 btrfs_set_item_size(buf, item1, item2_size);
5582                 btrfs_set_item_size(buf, item2, item1_size);
5583
5584                 path->slots[0] = slot;
5585                 btrfs_set_item_key_unsafe(root, path, &k2);
5586                 path->slots[0] = slot + 1;
5587                 btrfs_set_item_key_unsafe(root, path, &k1);
5588         }
5589         return 0;
5590 }
5591
5592 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5593 {
5594         struct extent_buffer *buf;
5595         struct btrfs_key k1, k2;
5596         int i;
5597         int level = path->lowest_level;
5598         int ret = -EIO;
5599
5600         buf = path->nodes[level];
5601         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5602                 if (level) {
5603                         btrfs_node_key_to_cpu(buf, &k1, i);
5604                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5605                 } else {
5606                         btrfs_item_key_to_cpu(buf, &k1, i);
5607                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5608                 }
5609                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5610                         continue;
5611                 ret = swap_values(root, path, buf, i);
5612                 if (ret)
5613                         break;
5614                 btrfs_mark_buffer_dirty(buf);
5615                 i = 0;
5616         }
5617         return ret;
5618 }
5619
5620 static int delete_bogus_item(struct btrfs_root *root,
5621                              struct btrfs_path *path,
5622                              struct extent_buffer *buf, int slot)
5623 {
5624         struct btrfs_key key;
5625         int nritems = btrfs_header_nritems(buf);
5626
5627         btrfs_item_key_to_cpu(buf, &key, slot);
5628
5629         /* These are all the keys we can deal with missing. */
5630         if (key.type != BTRFS_DIR_INDEX_KEY &&
5631             key.type != BTRFS_EXTENT_ITEM_KEY &&
5632             key.type != BTRFS_METADATA_ITEM_KEY &&
5633             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5634             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5635                 return -1;
5636
5637         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5638                (unsigned long long)key.objectid, key.type,
5639                (unsigned long long)key.offset, slot, buf->start);
5640         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5641                               btrfs_item_nr_offset(slot + 1),
5642                               sizeof(struct btrfs_item) *
5643                               (nritems - slot - 1));
5644         btrfs_set_header_nritems(buf, nritems - 1);
5645         if (slot == 0) {
5646                 struct btrfs_disk_key disk_key;
5647
5648                 btrfs_item_key(buf, &disk_key, 0);
5649                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5650         }
5651         btrfs_mark_buffer_dirty(buf);
5652         return 0;
5653 }
5654
5655 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5656 {
5657         struct extent_buffer *buf;
5658         int i;
5659         int ret = 0;
5660
5661         /* We should only get this for leaves */
5662         BUG_ON(path->lowest_level);
5663         buf = path->nodes[0];
5664 again:
5665         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5666                 unsigned int shift = 0, offset;
5667
5668                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5669                     BTRFS_LEAF_DATA_SIZE(root)) {
5670                         if (btrfs_item_end_nr(buf, i) >
5671                             BTRFS_LEAF_DATA_SIZE(root)) {
5672                                 ret = delete_bogus_item(root, path, buf, i);
5673                                 if (!ret)
5674                                         goto again;
5675                                 fprintf(stderr, "item is off the end of the "
5676                                         "leaf, can't fix\n");
5677                                 ret = -EIO;
5678                                 break;
5679                         }
5680                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5681                                 btrfs_item_end_nr(buf, i);
5682                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5683                            btrfs_item_offset_nr(buf, i - 1)) {
5684                         if (btrfs_item_end_nr(buf, i) >
5685                             btrfs_item_offset_nr(buf, i - 1)) {
5686                                 ret = delete_bogus_item(root, path, buf, i);
5687                                 if (!ret)
5688                                         goto again;
5689                                 fprintf(stderr, "items overlap, can't fix\n");
5690                                 ret = -EIO;
5691                                 break;
5692                         }
5693                         shift = btrfs_item_offset_nr(buf, i - 1) -
5694                                 btrfs_item_end_nr(buf, i);
5695                 }
5696                 if (!shift)
5697                         continue;
5698
5699                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5700                        i, shift, (unsigned long long)buf->start);
5701                 offset = btrfs_item_offset_nr(buf, i);
5702                 memmove_extent_buffer(buf,
5703                                       btrfs_leaf_data(buf) + offset + shift,
5704                                       btrfs_leaf_data(buf) + offset,
5705                                       btrfs_item_size_nr(buf, i));
5706                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5707                                       offset + shift);
5708                 btrfs_mark_buffer_dirty(buf);
5709         }
5710
5711         /*
5712          * We may have moved things, in which case we want to exit so we don't
5713          * write those changes out.  Once we have proper abort functionality in
5714          * progs this can be changed to something nicer.
5715          */
5716         BUG_ON(ret);
5717         return ret;
5718 }
5719
5720 /*
5721  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5722  * then just return -EIO.
5723  */
5724 static int try_to_fix_bad_block(struct btrfs_root *root,
5725                                 struct extent_buffer *buf,
5726                                 enum btrfs_tree_block_status status)
5727 {
5728         struct btrfs_trans_handle *trans;
5729         struct ulist *roots;
5730         struct ulist_node *node;
5731         struct btrfs_root *search_root;
5732         struct btrfs_path path;
5733         struct ulist_iterator iter;
5734         struct btrfs_key root_key, key;
5735         int ret;
5736
5737         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5738             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5739                 return -EIO;
5740
5741         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5742         if (ret)
5743                 return -EIO;
5744
5745         btrfs_init_path(&path);
5746         ULIST_ITER_INIT(&iter);
5747         while ((node = ulist_next(roots, &iter))) {
5748                 root_key.objectid = node->val;
5749                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5750                 root_key.offset = (u64)-1;
5751
5752                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5753                 if (IS_ERR(root)) {
5754                         ret = -EIO;
5755                         break;
5756                 }
5757
5758
5759                 trans = btrfs_start_transaction(search_root, 0);
5760                 if (IS_ERR(trans)) {
5761                         ret = PTR_ERR(trans);
5762                         break;
5763                 }
5764
5765                 path.lowest_level = btrfs_header_level(buf);
5766                 path.skip_check_block = 1;
5767                 if (path.lowest_level)
5768                         btrfs_node_key_to_cpu(buf, &key, 0);
5769                 else
5770                         btrfs_item_key_to_cpu(buf, &key, 0);
5771                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5772                 if (ret) {
5773                         ret = -EIO;
5774                         btrfs_commit_transaction(trans, search_root);
5775                         break;
5776                 }
5777                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5778                         ret = fix_key_order(search_root, &path);
5779                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780                         ret = fix_item_offset(search_root, &path);
5781                 if (ret) {
5782                         btrfs_commit_transaction(trans, search_root);
5783                         break;
5784                 }
5785                 btrfs_release_path(&path);
5786                 btrfs_commit_transaction(trans, search_root);
5787         }
5788         ulist_free(roots);
5789         btrfs_release_path(&path);
5790         return ret;
5791 }
5792
5793 static int check_block(struct btrfs_root *root,
5794                        struct cache_tree *extent_cache,
5795                        struct extent_buffer *buf, u64 flags)
5796 {
5797         struct extent_record *rec;
5798         struct cache_extent *cache;
5799         struct btrfs_key key;
5800         enum btrfs_tree_block_status status;
5801         int ret = 0;
5802         int level;
5803
5804         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5805         if (!cache)
5806                 return 1;
5807         rec = container_of(cache, struct extent_record, cache);
5808         rec->generation = btrfs_header_generation(buf);
5809
5810         level = btrfs_header_level(buf);
5811         if (btrfs_header_nritems(buf) > 0) {
5812
5813                 if (level == 0)
5814                         btrfs_item_key_to_cpu(buf, &key, 0);
5815                 else
5816                         btrfs_node_key_to_cpu(buf, &key, 0);
5817
5818                 rec->info_objectid = key.objectid;
5819         }
5820         rec->info_level = level;
5821
5822         if (btrfs_is_leaf(buf))
5823                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5824         else
5825                 status = btrfs_check_node(root, &rec->parent_key, buf);
5826
5827         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5828                 if (repair)
5829                         status = try_to_fix_bad_block(root, buf, status);
5830                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5831                         ret = -EIO;
5832                         fprintf(stderr, "bad block %llu\n",
5833                                 (unsigned long long)buf->start);
5834                 } else {
5835                         /*
5836                          * Signal to callers we need to start the scan over
5837                          * again since we'll have cowed blocks.
5838                          */
5839                         ret = -EAGAIN;
5840                 }
5841         } else {
5842                 rec->content_checked = 1;
5843                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5844                         rec->owner_ref_checked = 1;
5845                 else {
5846                         ret = check_owner_ref(root, rec, buf);
5847                         if (!ret)
5848                                 rec->owner_ref_checked = 1;
5849                 }
5850         }
5851         if (!ret)
5852                 maybe_free_extent_rec(extent_cache, rec);
5853         return ret;
5854 }
5855
5856 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5857                                                 u64 parent, u64 root)
5858 {
5859         struct list_head *cur = rec->backrefs.next;
5860         struct extent_backref *node;
5861         struct tree_backref *back;
5862
5863         while(cur != &rec->backrefs) {
5864                 node = to_extent_backref(cur);
5865                 cur = cur->next;
5866                 if (node->is_data)
5867                         continue;
5868                 back = to_tree_backref(node);
5869                 if (parent > 0) {
5870                         if (!node->full_backref)
5871                                 continue;
5872                         if (parent == back->parent)
5873                                 return back;
5874                 } else {
5875                         if (node->full_backref)
5876                                 continue;
5877                         if (back->root == root)
5878                                 return back;
5879                 }
5880         }
5881         return NULL;
5882 }
5883
5884 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5885                                                 u64 parent, u64 root)
5886 {
5887         struct tree_backref *ref = malloc(sizeof(*ref));
5888
5889         if (!ref)
5890                 return NULL;
5891         memset(&ref->node, 0, sizeof(ref->node));
5892         if (parent > 0) {
5893                 ref->parent = parent;
5894                 ref->node.full_backref = 1;
5895         } else {
5896                 ref->root = root;
5897                 ref->node.full_backref = 0;
5898         }
5899         list_add_tail(&ref->node.list, &rec->backrefs);
5900
5901         return ref;
5902 }
5903
5904 static struct data_backref *find_data_backref(struct extent_record *rec,
5905                                                 u64 parent, u64 root,
5906                                                 u64 owner, u64 offset,
5907                                                 int found_ref,
5908                                                 u64 disk_bytenr, u64 bytes)
5909 {
5910         struct list_head *cur = rec->backrefs.next;
5911         struct extent_backref *node;
5912         struct data_backref *back;
5913
5914         while(cur != &rec->backrefs) {
5915                 node = to_extent_backref(cur);
5916                 cur = cur->next;
5917                 if (!node->is_data)
5918                         continue;
5919                 back = to_data_backref(node);
5920                 if (parent > 0) {
5921                         if (!node->full_backref)
5922                                 continue;
5923                         if (parent == back->parent)
5924                                 return back;
5925                 } else {
5926                         if (node->full_backref)
5927                                 continue;
5928                         if (back->root == root && back->owner == owner &&
5929                             back->offset == offset) {
5930                                 if (found_ref && node->found_ref &&
5931                                     (back->bytes != bytes ||
5932                                     back->disk_bytenr != disk_bytenr))
5933                                         continue;
5934                                 return back;
5935                         }
5936                 }
5937         }
5938         return NULL;
5939 }
5940
5941 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5942                                                 u64 parent, u64 root,
5943                                                 u64 owner, u64 offset,
5944                                                 u64 max_size)
5945 {
5946         struct data_backref *ref = malloc(sizeof(*ref));
5947
5948         if (!ref)
5949                 return NULL;
5950         memset(&ref->node, 0, sizeof(ref->node));
5951         ref->node.is_data = 1;
5952
5953         if (parent > 0) {
5954                 ref->parent = parent;
5955                 ref->owner = 0;
5956                 ref->offset = 0;
5957                 ref->node.full_backref = 1;
5958         } else {
5959                 ref->root = root;
5960                 ref->owner = owner;
5961                 ref->offset = offset;
5962                 ref->node.full_backref = 0;
5963         }
5964         ref->bytes = max_size;
5965         ref->found_ref = 0;
5966         ref->num_refs = 0;
5967         list_add_tail(&ref->node.list, &rec->backrefs);
5968         if (max_size > rec->max_size)
5969                 rec->max_size = max_size;
5970         return ref;
5971 }
5972
5973 /* Check if the type of extent matches with its chunk */
5974 static void check_extent_type(struct extent_record *rec)
5975 {
5976         struct btrfs_block_group_cache *bg_cache;
5977
5978         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5979         if (!bg_cache)
5980                 return;
5981
5982         /* data extent, check chunk directly*/
5983         if (!rec->metadata) {
5984                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5985                         rec->wrong_chunk_type = 1;
5986                 return;
5987         }
5988
5989         /* metadata extent, check the obvious case first */
5990         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5991                                  BTRFS_BLOCK_GROUP_METADATA))) {
5992                 rec->wrong_chunk_type = 1;
5993                 return;
5994         }
5995
5996         /*
5997          * Check SYSTEM extent, as it's also marked as metadata, we can only
5998          * make sure it's a SYSTEM extent by its backref
5999          */
6000         if (!list_empty(&rec->backrefs)) {
6001                 struct extent_backref *node;
6002                 struct tree_backref *tback;
6003                 u64 bg_type;
6004
6005                 node = to_extent_backref(rec->backrefs.next);
6006                 if (node->is_data) {
6007                         /* tree block shouldn't have data backref */
6008                         rec->wrong_chunk_type = 1;
6009                         return;
6010                 }
6011                 tback = container_of(node, struct tree_backref, node);
6012
6013                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6014                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6015                 else
6016                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6017                 if (!(bg_cache->flags & bg_type))
6018                         rec->wrong_chunk_type = 1;
6019         }
6020 }
6021
6022 /*
6023  * Allocate a new extent record, fill default values from @tmpl and insert int
6024  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6025  * the cache, otherwise it fails.
6026  */
6027 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6028                 struct extent_record *tmpl)
6029 {
6030         struct extent_record *rec;
6031         int ret = 0;
6032
6033         BUG_ON(tmpl->max_size == 0);
6034         rec = malloc(sizeof(*rec));
6035         if (!rec)
6036                 return -ENOMEM;
6037         rec->start = tmpl->start;
6038         rec->max_size = tmpl->max_size;
6039         rec->nr = max(tmpl->nr, tmpl->max_size);
6040         rec->found_rec = tmpl->found_rec;
6041         rec->content_checked = tmpl->content_checked;
6042         rec->owner_ref_checked = tmpl->owner_ref_checked;
6043         rec->num_duplicates = 0;
6044         rec->metadata = tmpl->metadata;
6045         rec->flag_block_full_backref = FLAG_UNSET;
6046         rec->bad_full_backref = 0;
6047         rec->crossing_stripes = 0;
6048         rec->wrong_chunk_type = 0;
6049         rec->is_root = tmpl->is_root;
6050         rec->refs = tmpl->refs;
6051         rec->extent_item_refs = tmpl->extent_item_refs;
6052         rec->parent_generation = tmpl->parent_generation;
6053         INIT_LIST_HEAD(&rec->backrefs);
6054         INIT_LIST_HEAD(&rec->dups);
6055         INIT_LIST_HEAD(&rec->list);
6056         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6057         rec->cache.start = tmpl->start;
6058         rec->cache.size = tmpl->nr;
6059         ret = insert_cache_extent(extent_cache, &rec->cache);
6060         if (ret) {
6061                 free(rec);
6062                 return ret;
6063         }
6064         bytes_used += rec->nr;
6065
6066         if (tmpl->metadata)
6067                 rec->crossing_stripes = check_crossing_stripes(global_info,
6068                                 rec->start, global_info->tree_root->nodesize);
6069         check_extent_type(rec);
6070         return ret;
6071 }
6072
6073 /*
6074  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6075  * some are hints:
6076  * - refs              - if found, increase refs
6077  * - is_root           - if found, set
6078  * - content_checked   - if found, set
6079  * - owner_ref_checked - if found, set
6080  *
6081  * If not found, create a new one, initialize and insert.
6082  */
6083 static int add_extent_rec(struct cache_tree *extent_cache,
6084                 struct extent_record *tmpl)
6085 {
6086         struct extent_record *rec;
6087         struct cache_extent *cache;
6088         int ret = 0;
6089         int dup = 0;
6090
6091         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6092         if (cache) {
6093                 rec = container_of(cache, struct extent_record, cache);
6094                 if (tmpl->refs)
6095                         rec->refs++;
6096                 if (rec->nr == 1)
6097                         rec->nr = max(tmpl->nr, tmpl->max_size);
6098
6099                 /*
6100                  * We need to make sure to reset nr to whatever the extent
6101                  * record says was the real size, this way we can compare it to
6102                  * the backrefs.
6103                  */
6104                 if (tmpl->found_rec) {
6105                         if (tmpl->start != rec->start || rec->found_rec) {
6106                                 struct extent_record *tmp;
6107
6108                                 dup = 1;
6109                                 if (list_empty(&rec->list))
6110                                         list_add_tail(&rec->list,
6111                                                       &duplicate_extents);
6112
6113                                 /*
6114                                  * We have to do this song and dance in case we
6115                                  * find an extent record that falls inside of
6116                                  * our current extent record but does not have
6117                                  * the same objectid.
6118                                  */
6119                                 tmp = malloc(sizeof(*tmp));
6120                                 if (!tmp)
6121                                         return -ENOMEM;
6122                                 tmp->start = tmpl->start;
6123                                 tmp->max_size = tmpl->max_size;
6124                                 tmp->nr = tmpl->nr;
6125                                 tmp->found_rec = 1;
6126                                 tmp->metadata = tmpl->metadata;
6127                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6128                                 INIT_LIST_HEAD(&tmp->list);
6129                                 list_add_tail(&tmp->list, &rec->dups);
6130                                 rec->num_duplicates++;
6131                         } else {
6132                                 rec->nr = tmpl->nr;
6133                                 rec->found_rec = 1;
6134                         }
6135                 }
6136
6137                 if (tmpl->extent_item_refs && !dup) {
6138                         if (rec->extent_item_refs) {
6139                                 fprintf(stderr, "block %llu rec "
6140                                         "extent_item_refs %llu, passed %llu\n",
6141                                         (unsigned long long)tmpl->start,
6142                                         (unsigned long long)
6143                                                         rec->extent_item_refs,
6144                                         (unsigned long long)tmpl->extent_item_refs);
6145                         }
6146                         rec->extent_item_refs = tmpl->extent_item_refs;
6147                 }
6148                 if (tmpl->is_root)
6149                         rec->is_root = 1;
6150                 if (tmpl->content_checked)
6151                         rec->content_checked = 1;
6152                 if (tmpl->owner_ref_checked)
6153                         rec->owner_ref_checked = 1;
6154                 memcpy(&rec->parent_key, &tmpl->parent_key,
6155                                 sizeof(tmpl->parent_key));
6156                 if (tmpl->parent_generation)
6157                         rec->parent_generation = tmpl->parent_generation;
6158                 if (rec->max_size < tmpl->max_size)
6159                         rec->max_size = tmpl->max_size;
6160
6161                 /*
6162                  * A metadata extent can't cross stripe_len boundary, otherwise
6163                  * kernel scrub won't be able to handle it.
6164                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6165                  * it.
6166                  */
6167                 if (tmpl->metadata)
6168                         rec->crossing_stripes = check_crossing_stripes(
6169                                         global_info, rec->start,
6170                                         global_info->tree_root->nodesize);
6171                 check_extent_type(rec);
6172                 maybe_free_extent_rec(extent_cache, rec);
6173                 return ret;
6174         }
6175
6176         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6177
6178         return ret;
6179 }
6180
6181 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6182                             u64 parent, u64 root, int found_ref)
6183 {
6184         struct extent_record *rec;
6185         struct tree_backref *back;
6186         struct cache_extent *cache;
6187         int ret;
6188
6189         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6190         if (!cache) {
6191                 struct extent_record tmpl;
6192
6193                 memset(&tmpl, 0, sizeof(tmpl));
6194                 tmpl.start = bytenr;
6195                 tmpl.nr = 1;
6196                 tmpl.metadata = 1;
6197
6198                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6199                 if (ret)
6200                         return ret;
6201
6202                 /* really a bug in cache_extent implement now */
6203                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6204                 if (!cache)
6205                         return -ENOENT;
6206         }
6207
6208         rec = container_of(cache, struct extent_record, cache);
6209         if (rec->start != bytenr) {
6210                 /*
6211                  * Several cause, from unaligned bytenr to over lapping extents
6212                  */
6213                 return -EEXIST;
6214         }
6215
6216         back = find_tree_backref(rec, parent, root);
6217         if (!back) {
6218                 back = alloc_tree_backref(rec, parent, root);
6219                 if (!back)
6220                         return -ENOMEM;
6221         }
6222
6223         if (found_ref) {
6224                 if (back->node.found_ref) {
6225                         fprintf(stderr, "Extent back ref already exists "
6226                                 "for %llu parent %llu root %llu \n",
6227                                 (unsigned long long)bytenr,
6228                                 (unsigned long long)parent,
6229                                 (unsigned long long)root);
6230                 }
6231                 back->node.found_ref = 1;
6232         } else {
6233                 if (back->node.found_extent_tree) {
6234                         fprintf(stderr, "Extent back ref already exists "
6235                                 "for %llu parent %llu root %llu \n",
6236                                 (unsigned long long)bytenr,
6237                                 (unsigned long long)parent,
6238                                 (unsigned long long)root);
6239                 }
6240                 back->node.found_extent_tree = 1;
6241         }
6242         check_extent_type(rec);
6243         maybe_free_extent_rec(extent_cache, rec);
6244         return 0;
6245 }
6246
6247 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6248                             u64 parent, u64 root, u64 owner, u64 offset,
6249                             u32 num_refs, int found_ref, u64 max_size)
6250 {
6251         struct extent_record *rec;
6252         struct data_backref *back;
6253         struct cache_extent *cache;
6254         int ret;
6255
6256         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6257         if (!cache) {
6258                 struct extent_record tmpl;
6259
6260                 memset(&tmpl, 0, sizeof(tmpl));
6261                 tmpl.start = bytenr;
6262                 tmpl.nr = 1;
6263                 tmpl.max_size = max_size;
6264
6265                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6266                 if (ret)
6267                         return ret;
6268
6269                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6270                 if (!cache)
6271                         abort();
6272         }
6273
6274         rec = container_of(cache, struct extent_record, cache);
6275         if (rec->max_size < max_size)
6276                 rec->max_size = max_size;
6277
6278         /*
6279          * If found_ref is set then max_size is the real size and must match the
6280          * existing refs.  So if we have already found a ref then we need to
6281          * make sure that this ref matches the existing one, otherwise we need
6282          * to add a new backref so we can notice that the backrefs don't match
6283          * and we need to figure out who is telling the truth.  This is to
6284          * account for that awful fsync bug I introduced where we'd end up with
6285          * a btrfs_file_extent_item that would have its length include multiple
6286          * prealloc extents or point inside of a prealloc extent.
6287          */
6288         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6289                                  bytenr, max_size);
6290         if (!back) {
6291                 back = alloc_data_backref(rec, parent, root, owner, offset,
6292                                           max_size);
6293                 BUG_ON(!back);
6294         }
6295
6296         if (found_ref) {
6297                 BUG_ON(num_refs != 1);
6298                 if (back->node.found_ref)
6299                         BUG_ON(back->bytes != max_size);
6300                 back->node.found_ref = 1;
6301                 back->found_ref += 1;
6302                 back->bytes = max_size;
6303                 back->disk_bytenr = bytenr;
6304                 rec->refs += 1;
6305                 rec->content_checked = 1;
6306                 rec->owner_ref_checked = 1;
6307         } else {
6308                 if (back->node.found_extent_tree) {
6309                         fprintf(stderr, "Extent back ref already exists "
6310                                 "for %llu parent %llu root %llu "
6311                                 "owner %llu offset %llu num_refs %lu\n",
6312                                 (unsigned long long)bytenr,
6313                                 (unsigned long long)parent,
6314                                 (unsigned long long)root,
6315                                 (unsigned long long)owner,
6316                                 (unsigned long long)offset,
6317                                 (unsigned long)num_refs);
6318                 }
6319                 back->num_refs = num_refs;
6320                 back->node.found_extent_tree = 1;
6321         }
6322         maybe_free_extent_rec(extent_cache, rec);
6323         return 0;
6324 }
6325
6326 static int add_pending(struct cache_tree *pending,
6327                        struct cache_tree *seen, u64 bytenr, u32 size)
6328 {
6329         int ret;
6330         ret = add_cache_extent(seen, bytenr, size);
6331         if (ret)
6332                 return ret;
6333         add_cache_extent(pending, bytenr, size);
6334         return 0;
6335 }
6336
6337 static int pick_next_pending(struct cache_tree *pending,
6338                         struct cache_tree *reada,
6339                         struct cache_tree *nodes,
6340                         u64 last, struct block_info *bits, int bits_nr,
6341                         int *reada_bits)
6342 {
6343         unsigned long node_start = last;
6344         struct cache_extent *cache;
6345         int ret;
6346
6347         cache = search_cache_extent(reada, 0);
6348         if (cache) {
6349                 bits[0].start = cache->start;
6350                 bits[0].size = cache->size;
6351                 *reada_bits = 1;
6352                 return 1;
6353         }
6354         *reada_bits = 0;
6355         if (node_start > 32768)
6356                 node_start -= 32768;
6357
6358         cache = search_cache_extent(nodes, node_start);
6359         if (!cache)
6360                 cache = search_cache_extent(nodes, 0);
6361
6362         if (!cache) {
6363                  cache = search_cache_extent(pending, 0);
6364                  if (!cache)
6365                          return 0;
6366                  ret = 0;
6367                  do {
6368                          bits[ret].start = cache->start;
6369                          bits[ret].size = cache->size;
6370                          cache = next_cache_extent(cache);
6371                          ret++;
6372                  } while (cache && ret < bits_nr);
6373                  return ret;
6374         }
6375
6376         ret = 0;
6377         do {
6378                 bits[ret].start = cache->start;
6379                 bits[ret].size = cache->size;
6380                 cache = next_cache_extent(cache);
6381                 ret++;
6382         } while (cache && ret < bits_nr);
6383
6384         if (bits_nr - ret > 8) {
6385                 u64 lookup = bits[0].start + bits[0].size;
6386                 struct cache_extent *next;
6387                 next = search_cache_extent(pending, lookup);
6388                 while(next) {
6389                         if (next->start - lookup > 32768)
6390                                 break;
6391                         bits[ret].start = next->start;
6392                         bits[ret].size = next->size;
6393                         lookup = next->start + next->size;
6394                         ret++;
6395                         if (ret == bits_nr)
6396                                 break;
6397                         next = next_cache_extent(next);
6398                         if (!next)
6399                                 break;
6400                 }
6401         }
6402         return ret;
6403 }
6404
6405 static void free_chunk_record(struct cache_extent *cache)
6406 {
6407         struct chunk_record *rec;
6408
6409         rec = container_of(cache, struct chunk_record, cache);
6410         list_del_init(&rec->list);
6411         list_del_init(&rec->dextents);
6412         free(rec);
6413 }
6414
6415 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6416 {
6417         cache_tree_free_extents(chunk_cache, free_chunk_record);
6418 }
6419
6420 static void free_device_record(struct rb_node *node)
6421 {
6422         struct device_record *rec;
6423
6424         rec = container_of(node, struct device_record, node);
6425         free(rec);
6426 }
6427
6428 FREE_RB_BASED_TREE(device_cache, free_device_record);
6429
6430 int insert_block_group_record(struct block_group_tree *tree,
6431                               struct block_group_record *bg_rec)
6432 {
6433         int ret;
6434
6435         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6436         if (ret)
6437                 return ret;
6438
6439         list_add_tail(&bg_rec->list, &tree->block_groups);
6440         return 0;
6441 }
6442
6443 static void free_block_group_record(struct cache_extent *cache)
6444 {
6445         struct block_group_record *rec;
6446
6447         rec = container_of(cache, struct block_group_record, cache);
6448         list_del_init(&rec->list);
6449         free(rec);
6450 }
6451
6452 void free_block_group_tree(struct block_group_tree *tree)
6453 {
6454         cache_tree_free_extents(&tree->tree, free_block_group_record);
6455 }
6456
6457 int insert_device_extent_record(struct device_extent_tree *tree,
6458                                 struct device_extent_record *de_rec)
6459 {
6460         int ret;
6461
6462         /*
6463          * Device extent is a bit different from the other extents, because
6464          * the extents which belong to the different devices may have the
6465          * same start and size, so we need use the special extent cache
6466          * search/insert functions.
6467          */
6468         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6469         if (ret)
6470                 return ret;
6471
6472         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6473         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6474         return 0;
6475 }
6476
6477 static void free_device_extent_record(struct cache_extent *cache)
6478 {
6479         struct device_extent_record *rec;
6480
6481         rec = container_of(cache, struct device_extent_record, cache);
6482         if (!list_empty(&rec->chunk_list))
6483                 list_del_init(&rec->chunk_list);
6484         if (!list_empty(&rec->device_list))
6485                 list_del_init(&rec->device_list);
6486         free(rec);
6487 }
6488
6489 void free_device_extent_tree(struct device_extent_tree *tree)
6490 {
6491         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6492 }
6493
6494 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6495 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6496                                  struct extent_buffer *leaf, int slot)
6497 {
6498         struct btrfs_extent_ref_v0 *ref0;
6499         struct btrfs_key key;
6500         int ret;
6501
6502         btrfs_item_key_to_cpu(leaf, &key, slot);
6503         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6504         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6505                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6506                                 0, 0);
6507         } else {
6508                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6509                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6510         }
6511         return ret;
6512 }
6513 #endif
6514
6515 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6516                                             struct btrfs_key *key,
6517                                             int slot)
6518 {
6519         struct btrfs_chunk *ptr;
6520         struct chunk_record *rec;
6521         int num_stripes, i;
6522
6523         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6524         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6525
6526         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6527         if (!rec) {
6528                 fprintf(stderr, "memory allocation failed\n");
6529                 exit(-1);
6530         }
6531
6532         INIT_LIST_HEAD(&rec->list);
6533         INIT_LIST_HEAD(&rec->dextents);
6534         rec->bg_rec = NULL;
6535
6536         rec->cache.start = key->offset;
6537         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6538
6539         rec->generation = btrfs_header_generation(leaf);
6540
6541         rec->objectid = key->objectid;
6542         rec->type = key->type;
6543         rec->offset = key->offset;
6544
6545         rec->length = rec->cache.size;
6546         rec->owner = btrfs_chunk_owner(leaf, ptr);
6547         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6548         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6549         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6550         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6551         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6552         rec->num_stripes = num_stripes;
6553         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6554
6555         for (i = 0; i < rec->num_stripes; ++i) {
6556                 rec->stripes[i].devid =
6557                         btrfs_stripe_devid_nr(leaf, ptr, i);
6558                 rec->stripes[i].offset =
6559                         btrfs_stripe_offset_nr(leaf, ptr, i);
6560                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6561                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6562                                 BTRFS_UUID_SIZE);
6563         }
6564
6565         return rec;
6566 }
6567
6568 static int process_chunk_item(struct cache_tree *chunk_cache,
6569                               struct btrfs_key *key, struct extent_buffer *eb,
6570                               int slot)
6571 {
6572         struct chunk_record *rec;
6573         struct btrfs_chunk *chunk;
6574         int ret = 0;
6575
6576         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6577         /*
6578          * Do extra check for this chunk item,
6579          *
6580          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6581          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6582          * and owner<->key_type check.
6583          */
6584         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6585                                       key->offset);
6586         if (ret < 0) {
6587                 error("chunk(%llu, %llu) is not valid, ignore it",
6588                       key->offset, btrfs_chunk_length(eb, chunk));
6589                 return 0;
6590         }
6591         rec = btrfs_new_chunk_record(eb, key, slot);
6592         ret = insert_cache_extent(chunk_cache, &rec->cache);
6593         if (ret) {
6594                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6595                         rec->offset, rec->length);
6596                 free(rec);
6597         }
6598
6599         return ret;
6600 }
6601
6602 static int process_device_item(struct rb_root *dev_cache,
6603                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6604 {
6605         struct btrfs_dev_item *ptr;
6606         struct device_record *rec;
6607         int ret = 0;
6608
6609         ptr = btrfs_item_ptr(eb,
6610                 slot, struct btrfs_dev_item);
6611
6612         rec = malloc(sizeof(*rec));
6613         if (!rec) {
6614                 fprintf(stderr, "memory allocation failed\n");
6615                 return -ENOMEM;
6616         }
6617
6618         rec->devid = key->offset;
6619         rec->generation = btrfs_header_generation(eb);
6620
6621         rec->objectid = key->objectid;
6622         rec->type = key->type;
6623         rec->offset = key->offset;
6624
6625         rec->devid = btrfs_device_id(eb, ptr);
6626         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6627         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6628
6629         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6630         if (ret) {
6631                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6632                 free(rec);
6633         }
6634
6635         return ret;
6636 }
6637
6638 struct block_group_record *
6639 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6640                              int slot)
6641 {
6642         struct btrfs_block_group_item *ptr;
6643         struct block_group_record *rec;
6644
6645         rec = calloc(1, sizeof(*rec));
6646         if (!rec) {
6647                 fprintf(stderr, "memory allocation failed\n");
6648                 exit(-1);
6649         }
6650
6651         rec->cache.start = key->objectid;
6652         rec->cache.size = key->offset;
6653
6654         rec->generation = btrfs_header_generation(leaf);
6655
6656         rec->objectid = key->objectid;
6657         rec->type = key->type;
6658         rec->offset = key->offset;
6659
6660         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6661         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6662
6663         INIT_LIST_HEAD(&rec->list);
6664
6665         return rec;
6666 }
6667
6668 static int process_block_group_item(struct block_group_tree *block_group_cache,
6669                                     struct btrfs_key *key,
6670                                     struct extent_buffer *eb, int slot)
6671 {
6672         struct block_group_record *rec;
6673         int ret = 0;
6674
6675         rec = btrfs_new_block_group_record(eb, key, slot);
6676         ret = insert_block_group_record(block_group_cache, rec);
6677         if (ret) {
6678                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6679                         rec->objectid, rec->offset);
6680                 free(rec);
6681         }
6682
6683         return ret;
6684 }
6685
6686 struct device_extent_record *
6687 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6688                                struct btrfs_key *key, int slot)
6689 {
6690         struct device_extent_record *rec;
6691         struct btrfs_dev_extent *ptr;
6692
6693         rec = calloc(1, sizeof(*rec));
6694         if (!rec) {
6695                 fprintf(stderr, "memory allocation failed\n");
6696                 exit(-1);
6697         }
6698
6699         rec->cache.objectid = key->objectid;
6700         rec->cache.start = key->offset;
6701
6702         rec->generation = btrfs_header_generation(leaf);
6703
6704         rec->objectid = key->objectid;
6705         rec->type = key->type;
6706         rec->offset = key->offset;
6707
6708         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6709         rec->chunk_objecteid =
6710                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6711         rec->chunk_offset =
6712                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6713         rec->length = btrfs_dev_extent_length(leaf, ptr);
6714         rec->cache.size = rec->length;
6715
6716         INIT_LIST_HEAD(&rec->chunk_list);
6717         INIT_LIST_HEAD(&rec->device_list);
6718
6719         return rec;
6720 }
6721
6722 static int
6723 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6724                            struct btrfs_key *key, struct extent_buffer *eb,
6725                            int slot)
6726 {
6727         struct device_extent_record *rec;
6728         int ret;
6729
6730         rec = btrfs_new_device_extent_record(eb, key, slot);
6731         ret = insert_device_extent_record(dev_extent_cache, rec);
6732         if (ret) {
6733                 fprintf(stderr,
6734                         "Device extent[%llu, %llu, %llu] existed.\n",
6735                         rec->objectid, rec->offset, rec->length);
6736                 free(rec);
6737         }
6738
6739         return ret;
6740 }
6741
6742 static int process_extent_item(struct btrfs_root *root,
6743                                struct cache_tree *extent_cache,
6744                                struct extent_buffer *eb, int slot)
6745 {
6746         struct btrfs_extent_item *ei;
6747         struct btrfs_extent_inline_ref *iref;
6748         struct btrfs_extent_data_ref *dref;
6749         struct btrfs_shared_data_ref *sref;
6750         struct btrfs_key key;
6751         struct extent_record tmpl;
6752         unsigned long end;
6753         unsigned long ptr;
6754         int ret;
6755         int type;
6756         u32 item_size = btrfs_item_size_nr(eb, slot);
6757         u64 refs = 0;
6758         u64 offset;
6759         u64 num_bytes;
6760         int metadata = 0;
6761
6762         btrfs_item_key_to_cpu(eb, &key, slot);
6763
6764         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6765                 metadata = 1;
6766                 num_bytes = root->nodesize;
6767         } else {
6768                 num_bytes = key.offset;
6769         }
6770
6771         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6772                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6773                       key.objectid, root->sectorsize);
6774                 return -EIO;
6775         }
6776         if (item_size < sizeof(*ei)) {
6777 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6778                 struct btrfs_extent_item_v0 *ei0;
6779                 BUG_ON(item_size != sizeof(*ei0));
6780                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6781                 refs = btrfs_extent_refs_v0(eb, ei0);
6782 #else
6783                 BUG();
6784 #endif
6785                 memset(&tmpl, 0, sizeof(tmpl));
6786                 tmpl.start = key.objectid;
6787                 tmpl.nr = num_bytes;
6788                 tmpl.extent_item_refs = refs;
6789                 tmpl.metadata = metadata;
6790                 tmpl.found_rec = 1;
6791                 tmpl.max_size = num_bytes;
6792
6793                 return add_extent_rec(extent_cache, &tmpl);
6794         }
6795
6796         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6797         refs = btrfs_extent_refs(eb, ei);
6798         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6799                 metadata = 1;
6800         else
6801                 metadata = 0;
6802         if (metadata && num_bytes != root->nodesize) {
6803                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6804                       num_bytes, root->nodesize);
6805                 return -EIO;
6806         }
6807         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6808                 error("ignore invalid data extent, length %llu is not aligned to %u",
6809                       num_bytes, root->sectorsize);
6810                 return -EIO;
6811         }
6812
6813         memset(&tmpl, 0, sizeof(tmpl));
6814         tmpl.start = key.objectid;
6815         tmpl.nr = num_bytes;
6816         tmpl.extent_item_refs = refs;
6817         tmpl.metadata = metadata;
6818         tmpl.found_rec = 1;
6819         tmpl.max_size = num_bytes;
6820         add_extent_rec(extent_cache, &tmpl);
6821
6822         ptr = (unsigned long)(ei + 1);
6823         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6824             key.type == BTRFS_EXTENT_ITEM_KEY)
6825                 ptr += sizeof(struct btrfs_tree_block_info);
6826
6827         end = (unsigned long)ei + item_size;
6828         while (ptr < end) {
6829                 iref = (struct btrfs_extent_inline_ref *)ptr;
6830                 type = btrfs_extent_inline_ref_type(eb, iref);
6831                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6832                 switch (type) {
6833                 case BTRFS_TREE_BLOCK_REF_KEY:
6834                         ret = add_tree_backref(extent_cache, key.objectid,
6835                                         0, offset, 0);
6836                         if (ret < 0)
6837                                 error(
6838                         "add_tree_backref failed (extent items tree block): %s",
6839                                       strerror(-ret));
6840                         break;
6841                 case BTRFS_SHARED_BLOCK_REF_KEY:
6842                         ret = add_tree_backref(extent_cache, key.objectid,
6843                                         offset, 0, 0);
6844                         if (ret < 0)
6845                                 error(
6846                         "add_tree_backref failed (extent items shared block): %s",
6847                                       strerror(-ret));
6848                         break;
6849                 case BTRFS_EXTENT_DATA_REF_KEY:
6850                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6851                         add_data_backref(extent_cache, key.objectid, 0,
6852                                         btrfs_extent_data_ref_root(eb, dref),
6853                                         btrfs_extent_data_ref_objectid(eb,
6854                                                                        dref),
6855                                         btrfs_extent_data_ref_offset(eb, dref),
6856                                         btrfs_extent_data_ref_count(eb, dref),
6857                                         0, num_bytes);
6858                         break;
6859                 case BTRFS_SHARED_DATA_REF_KEY:
6860                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6861                         add_data_backref(extent_cache, key.objectid, offset,
6862                                         0, 0, 0,
6863                                         btrfs_shared_data_ref_count(eb, sref),
6864                                         0, num_bytes);
6865                         break;
6866                 default:
6867                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6868                                 key.objectid, key.type, num_bytes);
6869                         goto out;
6870                 }
6871                 ptr += btrfs_extent_inline_ref_size(type);
6872         }
6873         WARN_ON(ptr > end);
6874 out:
6875         return 0;
6876 }
6877
6878 static int check_cache_range(struct btrfs_root *root,
6879                              struct btrfs_block_group_cache *cache,
6880                              u64 offset, u64 bytes)
6881 {
6882         struct btrfs_free_space *entry;
6883         u64 *logical;
6884         u64 bytenr;
6885         int stripe_len;
6886         int i, nr, ret;
6887
6888         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6889                 bytenr = btrfs_sb_offset(i);
6890                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6891                                        cache->key.objectid, bytenr, 0,
6892                                        &logical, &nr, &stripe_len);
6893                 if (ret)
6894                         return ret;
6895
6896                 while (nr--) {
6897                         if (logical[nr] + stripe_len <= offset)
6898                                 continue;
6899                         if (offset + bytes <= logical[nr])
6900                                 continue;
6901                         if (logical[nr] == offset) {
6902                                 if (stripe_len >= bytes) {
6903                                         free(logical);
6904                                         return 0;
6905                                 }
6906                                 bytes -= stripe_len;
6907                                 offset += stripe_len;
6908                         } else if (logical[nr] < offset) {
6909                                 if (logical[nr] + stripe_len >=
6910                                     offset + bytes) {
6911                                         free(logical);
6912                                         return 0;
6913                                 }
6914                                 bytes = (offset + bytes) -
6915                                         (logical[nr] + stripe_len);
6916                                 offset = logical[nr] + stripe_len;
6917                         } else {
6918                                 /*
6919                                  * Could be tricky, the super may land in the
6920                                  * middle of the area we're checking.  First
6921                                  * check the easiest case, it's at the end.
6922                                  */
6923                                 if (logical[nr] + stripe_len >=
6924                                     bytes + offset) {
6925                                         bytes = logical[nr] - offset;
6926                                         continue;
6927                                 }
6928
6929                                 /* Check the left side */
6930                                 ret = check_cache_range(root, cache,
6931                                                         offset,
6932                                                         logical[nr] - offset);
6933                                 if (ret) {
6934                                         free(logical);
6935                                         return ret;
6936                                 }
6937
6938                                 /* Now we continue with the right side */
6939                                 bytes = (offset + bytes) -
6940                                         (logical[nr] + stripe_len);
6941                                 offset = logical[nr] + stripe_len;
6942                         }
6943                 }
6944
6945                 free(logical);
6946         }
6947
6948         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6949         if (!entry) {
6950                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6951                         offset, offset+bytes);
6952                 return -EINVAL;
6953         }
6954
6955         if (entry->offset != offset) {
6956                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6957                         entry->offset);
6958                 return -EINVAL;
6959         }
6960
6961         if (entry->bytes != bytes) {
6962                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6963                         bytes, entry->bytes, offset);
6964                 return -EINVAL;
6965         }
6966
6967         unlink_free_space(cache->free_space_ctl, entry);
6968         free(entry);
6969         return 0;
6970 }
6971
6972 static int verify_space_cache(struct btrfs_root *root,
6973                               struct btrfs_block_group_cache *cache)
6974 {
6975         struct btrfs_path path;
6976         struct extent_buffer *leaf;
6977         struct btrfs_key key;
6978         u64 last;
6979         int ret = 0;
6980
6981         root = root->fs_info->extent_root;
6982
6983         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6984
6985         btrfs_init_path(&path);
6986         key.objectid = last;
6987         key.offset = 0;
6988         key.type = BTRFS_EXTENT_ITEM_KEY;
6989         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6990         if (ret < 0)
6991                 goto out;
6992         ret = 0;
6993         while (1) {
6994                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6995                         ret = btrfs_next_leaf(root, &path);
6996                         if (ret < 0)
6997                                 goto out;
6998                         if (ret > 0) {
6999                                 ret = 0;
7000                                 break;
7001                         }
7002                 }
7003                 leaf = path.nodes[0];
7004                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7005                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7006                         break;
7007                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7008                     key.type != BTRFS_METADATA_ITEM_KEY) {
7009                         path.slots[0]++;
7010                         continue;
7011                 }
7012
7013                 if (last == key.objectid) {
7014                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7015                                 last = key.objectid + key.offset;
7016                         else
7017                                 last = key.objectid + root->nodesize;
7018                         path.slots[0]++;
7019                         continue;
7020                 }
7021
7022                 ret = check_cache_range(root, cache, last,
7023                                         key.objectid - last);
7024                 if (ret)
7025                         break;
7026                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7027                         last = key.objectid + key.offset;
7028                 else
7029                         last = key.objectid + root->nodesize;
7030                 path.slots[0]++;
7031         }
7032
7033         if (last < cache->key.objectid + cache->key.offset)
7034                 ret = check_cache_range(root, cache, last,
7035                                         cache->key.objectid +
7036                                         cache->key.offset - last);
7037
7038 out:
7039         btrfs_release_path(&path);
7040
7041         if (!ret &&
7042             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7043                 fprintf(stderr, "There are still entries left in the space "
7044                         "cache\n");
7045                 ret = -EINVAL;
7046         }
7047
7048         return ret;
7049 }
7050
7051 static int check_space_cache(struct btrfs_root *root)
7052 {
7053         struct btrfs_block_group_cache *cache;
7054         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7055         int ret;
7056         int error = 0;
7057
7058         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7059             btrfs_super_generation(root->fs_info->super_copy) !=
7060             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7061                 printf("cache and super generation don't match, space cache "
7062                        "will be invalidated\n");
7063                 return 0;
7064         }
7065
7066         if (ctx.progress_enabled) {
7067                 ctx.tp = TASK_FREE_SPACE;
7068                 task_start(ctx.info);
7069         }
7070
7071         while (1) {
7072                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7073                 if (!cache)
7074                         break;
7075
7076                 start = cache->key.objectid + cache->key.offset;
7077                 if (!cache->free_space_ctl) {
7078                         if (btrfs_init_free_space_ctl(cache,
7079                                                       root->sectorsize)) {
7080                                 ret = -ENOMEM;
7081                                 break;
7082                         }
7083                 } else {
7084                         btrfs_remove_free_space_cache(cache);
7085                 }
7086
7087                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7088                         ret = exclude_super_stripes(root, cache);
7089                         if (ret) {
7090                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7091                                         strerror(-ret));
7092                                 error++;
7093                                 continue;
7094                         }
7095                         ret = load_free_space_tree(root->fs_info, cache);
7096                         free_excluded_extents(root, cache);
7097                         if (ret < 0) {
7098                                 fprintf(stderr, "could not load free space tree: %s\n",
7099                                         strerror(-ret));
7100                                 error++;
7101                                 continue;
7102                         }
7103                         error += ret;
7104                 } else {
7105                         ret = load_free_space_cache(root->fs_info, cache);
7106                         if (!ret)
7107                                 continue;
7108                 }
7109
7110                 ret = verify_space_cache(root, cache);
7111                 if (ret) {
7112                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7113                                 cache->key.objectid);
7114                         error++;
7115                 }
7116         }
7117
7118         task_stop(ctx.info);
7119
7120         return error ? -EINVAL : 0;
7121 }
7122
7123 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7124                         u64 num_bytes, unsigned long leaf_offset,
7125                         struct extent_buffer *eb) {
7126
7127         u64 offset = 0;
7128         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7129         char *data;
7130         unsigned long csum_offset;
7131         u32 csum;
7132         u32 csum_expected;
7133         u64 read_len;
7134         u64 data_checked = 0;
7135         u64 tmp;
7136         int ret = 0;
7137         int mirror;
7138         int num_copies;
7139
7140         if (num_bytes % root->sectorsize)
7141                 return -EINVAL;
7142
7143         data = malloc(num_bytes);
7144         if (!data)
7145                 return -ENOMEM;
7146
7147         while (offset < num_bytes) {
7148                 mirror = 0;
7149 again:
7150                 read_len = num_bytes - offset;
7151                 /* read as much space once a time */
7152                 ret = read_extent_data(root, data + offset,
7153                                 bytenr + offset, &read_len, mirror);
7154                 if (ret)
7155                         goto out;
7156                 data_checked = 0;
7157                 /* verify every 4k data's checksum */
7158                 while (data_checked < read_len) {
7159                         csum = ~(u32)0;
7160                         tmp = offset + data_checked;
7161
7162                         csum = btrfs_csum_data((char *)data + tmp,
7163                                                csum, root->sectorsize);
7164                         btrfs_csum_final(csum, (u8 *)&csum);
7165
7166                         csum_offset = leaf_offset +
7167                                  tmp / root->sectorsize * csum_size;
7168                         read_extent_buffer(eb, (char *)&csum_expected,
7169                                            csum_offset, csum_size);
7170                         /* try another mirror */
7171                         if (csum != csum_expected) {
7172                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7173                                                 mirror, bytenr + tmp,
7174                                                 csum, csum_expected);
7175                                 num_copies = btrfs_num_copies(
7176                                                 &root->fs_info->mapping_tree,
7177                                                 bytenr, num_bytes);
7178                                 if (mirror < num_copies - 1) {
7179                                         mirror += 1;
7180                                         goto again;
7181                                 }
7182                         }
7183                         data_checked += root->sectorsize;
7184                 }
7185                 offset += read_len;
7186         }
7187 out:
7188         free(data);
7189         return ret;
7190 }
7191
7192 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7193                                u64 num_bytes)
7194 {
7195         struct btrfs_path path;
7196         struct extent_buffer *leaf;
7197         struct btrfs_key key;
7198         int ret;
7199
7200         btrfs_init_path(&path);
7201         key.objectid = bytenr;
7202         key.type = BTRFS_EXTENT_ITEM_KEY;
7203         key.offset = (u64)-1;
7204
7205 again:
7206         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7207                                 0, 0);
7208         if (ret < 0) {
7209                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7210                 btrfs_release_path(&path);
7211                 return ret;
7212         } else if (ret) {
7213                 if (path.slots[0] > 0) {
7214                         path.slots[0]--;
7215                 } else {
7216                         ret = btrfs_prev_leaf(root, &path);
7217                         if (ret < 0) {
7218                                 goto out;
7219                         } else if (ret > 0) {
7220                                 ret = 0;
7221                                 goto out;
7222                         }
7223                 }
7224         }
7225
7226         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7227
7228         /*
7229          * Block group items come before extent items if they have the same
7230          * bytenr, so walk back one more just in case.  Dear future traveller,
7231          * first congrats on mastering time travel.  Now if it's not too much
7232          * trouble could you go back to 2006 and tell Chris to make the
7233          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7234          * EXTENT_ITEM_KEY please?
7235          */
7236         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7237                 if (path.slots[0] > 0) {
7238                         path.slots[0]--;
7239                 } else {
7240                         ret = btrfs_prev_leaf(root, &path);
7241                         if (ret < 0) {
7242                                 goto out;
7243                         } else if (ret > 0) {
7244                                 ret = 0;
7245                                 goto out;
7246                         }
7247                 }
7248                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7249         }
7250
7251         while (num_bytes) {
7252                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7253                         ret = btrfs_next_leaf(root, &path);
7254                         if (ret < 0) {
7255                                 fprintf(stderr, "Error going to next leaf "
7256                                         "%d\n", ret);
7257                                 btrfs_release_path(&path);
7258                                 return ret;
7259                         } else if (ret) {
7260                                 break;
7261                         }
7262                 }
7263                 leaf = path.nodes[0];
7264                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7265                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7266                         path.slots[0]++;
7267                         continue;
7268                 }
7269                 if (key.objectid + key.offset < bytenr) {
7270                         path.slots[0]++;
7271                         continue;
7272                 }
7273                 if (key.objectid > bytenr + num_bytes)
7274                         break;
7275
7276                 if (key.objectid == bytenr) {
7277                         if (key.offset >= num_bytes) {
7278                                 num_bytes = 0;
7279                                 break;
7280                         }
7281                         num_bytes -= key.offset;
7282                         bytenr += key.offset;
7283                 } else if (key.objectid < bytenr) {
7284                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7285                                 num_bytes = 0;
7286                                 break;
7287                         }
7288                         num_bytes = (bytenr + num_bytes) -
7289                                 (key.objectid + key.offset);
7290                         bytenr = key.objectid + key.offset;
7291                 } else {
7292                         if (key.objectid + key.offset < bytenr + num_bytes) {
7293                                 u64 new_start = key.objectid + key.offset;
7294                                 u64 new_bytes = bytenr + num_bytes - new_start;
7295
7296                                 /*
7297                                  * Weird case, the extent is in the middle of
7298                                  * our range, we'll have to search one side
7299                                  * and then the other.  Not sure if this happens
7300                                  * in real life, but no harm in coding it up
7301                                  * anyway just in case.
7302                                  */
7303                                 btrfs_release_path(&path);
7304                                 ret = check_extent_exists(root, new_start,
7305                                                           new_bytes);
7306                                 if (ret) {
7307                                         fprintf(stderr, "Right section didn't "
7308                                                 "have a record\n");
7309                                         break;
7310                                 }
7311                                 num_bytes = key.objectid - bytenr;
7312                                 goto again;
7313                         }
7314                         num_bytes = key.objectid - bytenr;
7315                 }
7316                 path.slots[0]++;
7317         }
7318         ret = 0;
7319
7320 out:
7321         if (num_bytes && !ret) {
7322                 fprintf(stderr, "There are no extents for csum range "
7323                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7324                 ret = 1;
7325         }
7326
7327         btrfs_release_path(&path);
7328         return ret;
7329 }
7330
7331 static int check_csums(struct btrfs_root *root)
7332 {
7333         struct btrfs_path path;
7334         struct extent_buffer *leaf;
7335         struct btrfs_key key;
7336         u64 offset = 0, num_bytes = 0;
7337         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7338         int errors = 0;
7339         int ret;
7340         u64 data_len;
7341         unsigned long leaf_offset;
7342
7343         root = root->fs_info->csum_root;
7344         if (!extent_buffer_uptodate(root->node)) {
7345                 fprintf(stderr, "No valid csum tree found\n");
7346                 return -ENOENT;
7347         }
7348
7349         btrfs_init_path(&path);
7350         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7351         key.type = BTRFS_EXTENT_CSUM_KEY;
7352         key.offset = 0;
7353         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7354         if (ret < 0) {
7355                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7356                 btrfs_release_path(&path);
7357                 return ret;
7358         }
7359
7360         if (ret > 0 && path.slots[0])
7361                 path.slots[0]--;
7362         ret = 0;
7363
7364         while (1) {
7365                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7366                         ret = btrfs_next_leaf(root, &path);
7367                         if (ret < 0) {
7368                                 fprintf(stderr, "Error going to next leaf "
7369                                         "%d\n", ret);
7370                                 break;
7371                         }
7372                         if (ret)
7373                                 break;
7374                 }
7375                 leaf = path.nodes[0];
7376
7377                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7378                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7379                         path.slots[0]++;
7380                         continue;
7381                 }
7382
7383                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7384                               csum_size) * root->sectorsize;
7385                 if (!check_data_csum)
7386                         goto skip_csum_check;
7387                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7388                 ret = check_extent_csums(root, key.offset, data_len,
7389                                          leaf_offset, leaf);
7390                 if (ret)
7391                         break;
7392 skip_csum_check:
7393                 if (!num_bytes) {
7394                         offset = key.offset;
7395                 } else if (key.offset != offset + num_bytes) {
7396                         ret = check_extent_exists(root, offset, num_bytes);
7397                         if (ret) {
7398                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7399                                         "there is no extent record\n",
7400                                         offset, offset+num_bytes);
7401                                 errors++;
7402                         }
7403                         offset = key.offset;
7404                         num_bytes = 0;
7405                 }
7406                 num_bytes += data_len;
7407                 path.slots[0]++;
7408         }
7409
7410         btrfs_release_path(&path);
7411         return errors;
7412 }
7413
7414 static int is_dropped_key(struct btrfs_key *key,
7415                           struct btrfs_key *drop_key) {
7416         if (key->objectid < drop_key->objectid)
7417                 return 1;
7418         else if (key->objectid == drop_key->objectid) {
7419                 if (key->type < drop_key->type)
7420                         return 1;
7421                 else if (key->type == drop_key->type) {
7422                         if (key->offset < drop_key->offset)
7423                                 return 1;
7424                 }
7425         }
7426         return 0;
7427 }
7428
7429 /*
7430  * Here are the rules for FULL_BACKREF.
7431  *
7432  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7433  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7434  *      FULL_BACKREF set.
7435  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7436  *    if it happened after the relocation occurred since we'll have dropped the
7437  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7438  *    have no real way to know for sure.
7439  *
7440  * We process the blocks one root at a time, and we start from the lowest root
7441  * objectid and go to the highest.  So we can just lookup the owner backref for
7442  * the record and if we don't find it then we know it doesn't exist and we have
7443  * a FULL BACKREF.
7444  *
7445  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7446  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7447  * be set or not and then we can check later once we've gathered all the refs.
7448  */
7449 static int calc_extent_flag(struct cache_tree *extent_cache,
7450                            struct extent_buffer *buf,
7451                            struct root_item_record *ri,
7452                            u64 *flags)
7453 {
7454         struct extent_record *rec;
7455         struct cache_extent *cache;
7456         struct tree_backref *tback;
7457         u64 owner = 0;
7458
7459         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7460         /* we have added this extent before */
7461         if (!cache)
7462                 return -ENOENT;
7463
7464         rec = container_of(cache, struct extent_record, cache);
7465
7466         /*
7467          * Except file/reloc tree, we can not have
7468          * FULL BACKREF MODE
7469          */
7470         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7471                 goto normal;
7472         /*
7473          * root node
7474          */
7475         if (buf->start == ri->bytenr)
7476                 goto normal;
7477
7478         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7479                 goto full_backref;
7480
7481         owner = btrfs_header_owner(buf);
7482         if (owner == ri->objectid)
7483                 goto normal;
7484
7485         tback = find_tree_backref(rec, 0, owner);
7486         if (!tback)
7487                 goto full_backref;
7488 normal:
7489         *flags = 0;
7490         if (rec->flag_block_full_backref != FLAG_UNSET &&
7491             rec->flag_block_full_backref != 0)
7492                 rec->bad_full_backref = 1;
7493         return 0;
7494 full_backref:
7495         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7496         if (rec->flag_block_full_backref != FLAG_UNSET &&
7497             rec->flag_block_full_backref != 1)
7498                 rec->bad_full_backref = 1;
7499         return 0;
7500 }
7501
7502 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7503 {
7504         fprintf(stderr, "Invalid key type(");
7505         print_key_type(stderr, 0, key_type);
7506         fprintf(stderr, ") found in root(");
7507         print_objectid(stderr, rootid, 0);
7508         fprintf(stderr, ")\n");
7509 }
7510
7511 /*
7512  * Check if the key is valid with its extent buffer.
7513  *
7514  * This is a early check in case invalid key exists in a extent buffer
7515  * This is not comprehensive yet, but should prevent wrong key/item passed
7516  * further
7517  */
7518 static int check_type_with_root(u64 rootid, u8 key_type)
7519 {
7520         switch (key_type) {
7521         /* Only valid in chunk tree */
7522         case BTRFS_DEV_ITEM_KEY:
7523         case BTRFS_CHUNK_ITEM_KEY:
7524                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7525                         goto err;
7526                 break;
7527         /* valid in csum and log tree */
7528         case BTRFS_CSUM_TREE_OBJECTID:
7529                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7530                       is_fstree(rootid)))
7531                         goto err;
7532                 break;
7533         case BTRFS_EXTENT_ITEM_KEY:
7534         case BTRFS_METADATA_ITEM_KEY:
7535         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7536                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7537                         goto err;
7538                 break;
7539         case BTRFS_ROOT_ITEM_KEY:
7540                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7541                         goto err;
7542                 break;
7543         case BTRFS_DEV_EXTENT_KEY:
7544                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7545                         goto err;
7546                 break;
7547         }
7548         return 0;
7549 err:
7550         report_mismatch_key_root(key_type, rootid);
7551         return -EINVAL;
7552 }
7553
7554 static int run_next_block(struct btrfs_root *root,
7555                           struct block_info *bits,
7556                           int bits_nr,
7557                           u64 *last,
7558                           struct cache_tree *pending,
7559                           struct cache_tree *seen,
7560                           struct cache_tree *reada,
7561                           struct cache_tree *nodes,
7562                           struct cache_tree *extent_cache,
7563                           struct cache_tree *chunk_cache,
7564                           struct rb_root *dev_cache,
7565                           struct block_group_tree *block_group_cache,
7566                           struct device_extent_tree *dev_extent_cache,
7567                           struct root_item_record *ri)
7568 {
7569         struct extent_buffer *buf;
7570         struct extent_record *rec = NULL;
7571         u64 bytenr;
7572         u32 size;
7573         u64 parent;
7574         u64 owner;
7575         u64 flags;
7576         u64 ptr;
7577         u64 gen = 0;
7578         int ret = 0;
7579         int i;
7580         int nritems;
7581         struct btrfs_key key;
7582         struct cache_extent *cache;
7583         int reada_bits;
7584
7585         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7586                                     bits_nr, &reada_bits);
7587         if (nritems == 0)
7588                 return 1;
7589
7590         if (!reada_bits) {
7591                 for(i = 0; i < nritems; i++) {
7592                         ret = add_cache_extent(reada, bits[i].start,
7593                                                bits[i].size);
7594                         if (ret == -EEXIST)
7595                                 continue;
7596
7597                         /* fixme, get the parent transid */
7598                         readahead_tree_block(root, bits[i].start,
7599                                              bits[i].size, 0);
7600                 }
7601         }
7602         *last = bits[0].start;
7603         bytenr = bits[0].start;
7604         size = bits[0].size;
7605
7606         cache = lookup_cache_extent(pending, bytenr, size);
7607         if (cache) {
7608                 remove_cache_extent(pending, cache);
7609                 free(cache);
7610         }
7611         cache = lookup_cache_extent(reada, bytenr, size);
7612         if (cache) {
7613                 remove_cache_extent(reada, cache);
7614                 free(cache);
7615         }
7616         cache = lookup_cache_extent(nodes, bytenr, size);
7617         if (cache) {
7618                 remove_cache_extent(nodes, cache);
7619                 free(cache);
7620         }
7621         cache = lookup_cache_extent(extent_cache, bytenr, size);
7622         if (cache) {
7623                 rec = container_of(cache, struct extent_record, cache);
7624                 gen = rec->parent_generation;
7625         }
7626
7627         /* fixme, get the real parent transid */
7628         buf = read_tree_block(root, bytenr, size, gen);
7629         if (!extent_buffer_uptodate(buf)) {
7630                 record_bad_block_io(root->fs_info,
7631                                     extent_cache, bytenr, size);
7632                 goto out;
7633         }
7634
7635         nritems = btrfs_header_nritems(buf);
7636
7637         flags = 0;
7638         if (!init_extent_tree) {
7639                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7640                                        btrfs_header_level(buf), 1, NULL,
7641                                        &flags);
7642                 if (ret < 0) {
7643                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7644                         if (ret < 0) {
7645                                 fprintf(stderr, "Couldn't calc extent flags\n");
7646                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7647                         }
7648                 }
7649         } else {
7650                 flags = 0;
7651                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7652                 if (ret < 0) {
7653                         fprintf(stderr, "Couldn't calc extent flags\n");
7654                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7655                 }
7656         }
7657
7658         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7659                 if (ri != NULL &&
7660                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7661                     ri->objectid == btrfs_header_owner(buf)) {
7662                         /*
7663                          * Ok we got to this block from it's original owner and
7664                          * we have FULL_BACKREF set.  Relocation can leave
7665                          * converted blocks over so this is altogether possible,
7666                          * however it's not possible if the generation > the
7667                          * last snapshot, so check for this case.
7668                          */
7669                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7670                             btrfs_header_generation(buf) > ri->last_snapshot) {
7671                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7672                                 rec->bad_full_backref = 1;
7673                         }
7674                 }
7675         } else {
7676                 if (ri != NULL &&
7677                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7678                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7679                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7680                         rec->bad_full_backref = 1;
7681                 }
7682         }
7683
7684         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7685                 rec->flag_block_full_backref = 1;
7686                 parent = bytenr;
7687                 owner = 0;
7688         } else {
7689                 rec->flag_block_full_backref = 0;
7690                 parent = 0;
7691                 owner = btrfs_header_owner(buf);
7692         }
7693
7694         ret = check_block(root, extent_cache, buf, flags);
7695         if (ret)
7696                 goto out;
7697
7698         if (btrfs_is_leaf(buf)) {
7699                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7700                 for (i = 0; i < nritems; i++) {
7701                         struct btrfs_file_extent_item *fi;
7702                         btrfs_item_key_to_cpu(buf, &key, i);
7703                         /*
7704                          * Check key type against the leaf owner.
7705                          * Could filter quite a lot of early error if
7706                          * owner is correct
7707                          */
7708                         if (check_type_with_root(btrfs_header_owner(buf),
7709                                                  key.type)) {
7710                                 fprintf(stderr, "ignoring invalid key\n");
7711                                 continue;
7712                         }
7713                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7714                                 process_extent_item(root, extent_cache, buf,
7715                                                     i);
7716                                 continue;
7717                         }
7718                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7719                                 process_extent_item(root, extent_cache, buf,
7720                                                     i);
7721                                 continue;
7722                         }
7723                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7724                                 total_csum_bytes +=
7725                                         btrfs_item_size_nr(buf, i);
7726                                 continue;
7727                         }
7728                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7729                                 process_chunk_item(chunk_cache, &key, buf, i);
7730                                 continue;
7731                         }
7732                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7733                                 process_device_item(dev_cache, &key, buf, i);
7734                                 continue;
7735                         }
7736                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7737                                 process_block_group_item(block_group_cache,
7738                                         &key, buf, i);
7739                                 continue;
7740                         }
7741                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7742                                 process_device_extent_item(dev_extent_cache,
7743                                         &key, buf, i);
7744                                 continue;
7745
7746                         }
7747                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7748 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7749                                 process_extent_ref_v0(extent_cache, buf, i);
7750 #else
7751                                 BUG();
7752 #endif
7753                                 continue;
7754                         }
7755
7756                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7757                                 ret = add_tree_backref(extent_cache,
7758                                                 key.objectid, 0, key.offset, 0);
7759                                 if (ret < 0)
7760                                         error(
7761                                 "add_tree_backref failed (leaf tree block): %s",
7762                                               strerror(-ret));
7763                                 continue;
7764                         }
7765                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7766                                 ret = add_tree_backref(extent_cache,
7767                                                 key.objectid, key.offset, 0, 0);
7768                                 if (ret < 0)
7769                                         error(
7770                                 "add_tree_backref failed (leaf shared block): %s",
7771                                               strerror(-ret));
7772                                 continue;
7773                         }
7774                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7775                                 struct btrfs_extent_data_ref *ref;
7776                                 ref = btrfs_item_ptr(buf, i,
7777                                                 struct btrfs_extent_data_ref);
7778                                 add_data_backref(extent_cache,
7779                                         key.objectid, 0,
7780                                         btrfs_extent_data_ref_root(buf, ref),
7781                                         btrfs_extent_data_ref_objectid(buf,
7782                                                                        ref),
7783                                         btrfs_extent_data_ref_offset(buf, ref),
7784                                         btrfs_extent_data_ref_count(buf, ref),
7785                                         0, root->sectorsize);
7786                                 continue;
7787                         }
7788                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7789                                 struct btrfs_shared_data_ref *ref;
7790                                 ref = btrfs_item_ptr(buf, i,
7791                                                 struct btrfs_shared_data_ref);
7792                                 add_data_backref(extent_cache,
7793                                         key.objectid, key.offset, 0, 0, 0,
7794                                         btrfs_shared_data_ref_count(buf, ref),
7795                                         0, root->sectorsize);
7796                                 continue;
7797                         }
7798                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7799                                 struct bad_item *bad;
7800
7801                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7802                                         continue;
7803                                 if (!owner)
7804                                         continue;
7805                                 bad = malloc(sizeof(struct bad_item));
7806                                 if (!bad)
7807                                         continue;
7808                                 INIT_LIST_HEAD(&bad->list);
7809                                 memcpy(&bad->key, &key,
7810                                        sizeof(struct btrfs_key));
7811                                 bad->root_id = owner;
7812                                 list_add_tail(&bad->list, &delete_items);
7813                                 continue;
7814                         }
7815                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7816                                 continue;
7817                         fi = btrfs_item_ptr(buf, i,
7818                                             struct btrfs_file_extent_item);
7819                         if (btrfs_file_extent_type(buf, fi) ==
7820                             BTRFS_FILE_EXTENT_INLINE)
7821                                 continue;
7822                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7823                                 continue;
7824
7825                         data_bytes_allocated +=
7826                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7827                         if (data_bytes_allocated < root->sectorsize) {
7828                                 abort();
7829                         }
7830                         data_bytes_referenced +=
7831                                 btrfs_file_extent_num_bytes(buf, fi);
7832                         add_data_backref(extent_cache,
7833                                 btrfs_file_extent_disk_bytenr(buf, fi),
7834                                 parent, owner, key.objectid, key.offset -
7835                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7836                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7837                 }
7838         } else {
7839                 int level;
7840                 struct btrfs_key first_key;
7841
7842                 first_key.objectid = 0;
7843
7844                 if (nritems > 0)
7845                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7846                 level = btrfs_header_level(buf);
7847                 for (i = 0; i < nritems; i++) {
7848                         struct extent_record tmpl;
7849
7850                         ptr = btrfs_node_blockptr(buf, i);
7851                         size = root->nodesize;
7852                         btrfs_node_key_to_cpu(buf, &key, i);
7853                         if (ri != NULL) {
7854                                 if ((level == ri->drop_level)
7855                                     && is_dropped_key(&key, &ri->drop_key)) {
7856                                         continue;
7857                                 }
7858                         }
7859
7860                         memset(&tmpl, 0, sizeof(tmpl));
7861                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7862                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7863                         tmpl.start = ptr;
7864                         tmpl.nr = size;
7865                         tmpl.refs = 1;
7866                         tmpl.metadata = 1;
7867                         tmpl.max_size = size;
7868                         ret = add_extent_rec(extent_cache, &tmpl);
7869                         if (ret < 0)
7870                                 goto out;
7871
7872                         ret = add_tree_backref(extent_cache, ptr, parent,
7873                                         owner, 1);
7874                         if (ret < 0) {
7875                                 error(
7876                                 "add_tree_backref failed (non-leaf block): %s",
7877                                       strerror(-ret));
7878                                 continue;
7879                         }
7880
7881                         if (level > 1) {
7882                                 add_pending(nodes, seen, ptr, size);
7883                         } else {
7884                                 add_pending(pending, seen, ptr, size);
7885                         }
7886                 }
7887                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7888                                       nritems) * sizeof(struct btrfs_key_ptr);
7889         }
7890         total_btree_bytes += buf->len;
7891         if (fs_root_objectid(btrfs_header_owner(buf)))
7892                 total_fs_tree_bytes += buf->len;
7893         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7894                 total_extent_tree_bytes += buf->len;
7895         if (!found_old_backref &&
7896             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7897             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7898             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7899                 found_old_backref = 1;
7900 out:
7901         free_extent_buffer(buf);
7902         return ret;
7903 }
7904
7905 static int add_root_to_pending(struct extent_buffer *buf,
7906                                struct cache_tree *extent_cache,
7907                                struct cache_tree *pending,
7908                                struct cache_tree *seen,
7909                                struct cache_tree *nodes,
7910                                u64 objectid)
7911 {
7912         struct extent_record tmpl;
7913         int ret;
7914
7915         if (btrfs_header_level(buf) > 0)
7916                 add_pending(nodes, seen, buf->start, buf->len);
7917         else
7918                 add_pending(pending, seen, buf->start, buf->len);
7919
7920         memset(&tmpl, 0, sizeof(tmpl));
7921         tmpl.start = buf->start;
7922         tmpl.nr = buf->len;
7923         tmpl.is_root = 1;
7924         tmpl.refs = 1;
7925         tmpl.metadata = 1;
7926         tmpl.max_size = buf->len;
7927         add_extent_rec(extent_cache, &tmpl);
7928
7929         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7930             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7931                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7932                                 0, 1);
7933         else
7934                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7935                                 1);
7936         return ret;
7937 }
7938
7939 /* as we fix the tree, we might be deleting blocks that
7940  * we're tracking for repair.  This hook makes sure we
7941  * remove any backrefs for blocks as we are fixing them.
7942  */
7943 static int free_extent_hook(struct btrfs_trans_handle *trans,
7944                             struct btrfs_root *root,
7945                             u64 bytenr, u64 num_bytes, u64 parent,
7946                             u64 root_objectid, u64 owner, u64 offset,
7947                             int refs_to_drop)
7948 {
7949         struct extent_record *rec;
7950         struct cache_extent *cache;
7951         int is_data;
7952         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7953
7954         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7955         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7956         if (!cache)
7957                 return 0;
7958
7959         rec = container_of(cache, struct extent_record, cache);
7960         if (is_data) {
7961                 struct data_backref *back;
7962                 back = find_data_backref(rec, parent, root_objectid, owner,
7963                                          offset, 1, bytenr, num_bytes);
7964                 if (!back)
7965                         goto out;
7966                 if (back->node.found_ref) {
7967                         back->found_ref -= refs_to_drop;
7968                         if (rec->refs)
7969                                 rec->refs -= refs_to_drop;
7970                 }
7971                 if (back->node.found_extent_tree) {
7972                         back->num_refs -= refs_to_drop;
7973                         if (rec->extent_item_refs)
7974                                 rec->extent_item_refs -= refs_to_drop;
7975                 }
7976                 if (back->found_ref == 0)
7977                         back->node.found_ref = 0;
7978                 if (back->num_refs == 0)
7979                         back->node.found_extent_tree = 0;
7980
7981                 if (!back->node.found_extent_tree && back->node.found_ref) {
7982                         list_del(&back->node.list);
7983                         free(back);
7984                 }
7985         } else {
7986                 struct tree_backref *back;
7987                 back = find_tree_backref(rec, parent, root_objectid);
7988                 if (!back)
7989                         goto out;
7990                 if (back->node.found_ref) {
7991                         if (rec->refs)
7992                                 rec->refs--;
7993                         back->node.found_ref = 0;
7994                 }
7995                 if (back->node.found_extent_tree) {
7996                         if (rec->extent_item_refs)
7997                                 rec->extent_item_refs--;
7998                         back->node.found_extent_tree = 0;
7999                 }
8000                 if (!back->node.found_extent_tree && back->node.found_ref) {
8001                         list_del(&back->node.list);
8002                         free(back);
8003                 }
8004         }
8005         maybe_free_extent_rec(extent_cache, rec);
8006 out:
8007         return 0;
8008 }
8009
8010 static int delete_extent_records(struct btrfs_trans_handle *trans,
8011                                  struct btrfs_root *root,
8012                                  struct btrfs_path *path,
8013                                  u64 bytenr)
8014 {
8015         struct btrfs_key key;
8016         struct btrfs_key found_key;
8017         struct extent_buffer *leaf;
8018         int ret;
8019         int slot;
8020
8021
8022         key.objectid = bytenr;
8023         key.type = (u8)-1;
8024         key.offset = (u64)-1;
8025
8026         while(1) {
8027                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8028                                         &key, path, 0, 1);
8029                 if (ret < 0)
8030                         break;
8031
8032                 if (ret > 0) {
8033                         ret = 0;
8034                         if (path->slots[0] == 0)
8035                                 break;
8036                         path->slots[0]--;
8037                 }
8038                 ret = 0;
8039
8040                 leaf = path->nodes[0];
8041                 slot = path->slots[0];
8042
8043                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8044                 if (found_key.objectid != bytenr)
8045                         break;
8046
8047                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8048                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8049                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8050                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8051                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8052                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8053                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8054                         btrfs_release_path(path);
8055                         if (found_key.type == 0) {
8056                                 if (found_key.offset == 0)
8057                                         break;
8058                                 key.offset = found_key.offset - 1;
8059                                 key.type = found_key.type;
8060                         }
8061                         key.type = found_key.type - 1;
8062                         key.offset = (u64)-1;
8063                         continue;
8064                 }
8065
8066                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8067                         found_key.objectid, found_key.type, found_key.offset);
8068
8069                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8070                 if (ret)
8071                         break;
8072                 btrfs_release_path(path);
8073
8074                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8075                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8076                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8077                                 found_key.offset : root->nodesize;
8078
8079                         ret = btrfs_update_block_group(trans, root, bytenr,
8080                                                        bytes, 0, 0);
8081                         if (ret)
8082                                 break;
8083                 }
8084         }
8085
8086         btrfs_release_path(path);
8087         return ret;
8088 }
8089
8090 /*
8091  * for a single backref, this will allocate a new extent
8092  * and add the backref to it.
8093  */
8094 static int record_extent(struct btrfs_trans_handle *trans,
8095                          struct btrfs_fs_info *info,
8096                          struct btrfs_path *path,
8097                          struct extent_record *rec,
8098                          struct extent_backref *back,
8099                          int allocated, u64 flags)
8100 {
8101         int ret = 0;
8102         struct btrfs_root *extent_root = info->extent_root;
8103         struct extent_buffer *leaf;
8104         struct btrfs_key ins_key;
8105         struct btrfs_extent_item *ei;
8106         struct data_backref *dback;
8107         struct btrfs_tree_block_info *bi;
8108
8109         if (!back->is_data)
8110                 rec->max_size = max_t(u64, rec->max_size,
8111                                     info->extent_root->nodesize);
8112
8113         if (!allocated) {
8114                 u32 item_size = sizeof(*ei);
8115
8116                 if (!back->is_data)
8117                         item_size += sizeof(*bi);
8118
8119                 ins_key.objectid = rec->start;
8120                 ins_key.offset = rec->max_size;
8121                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8122
8123                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8124                                         &ins_key, item_size);
8125                 if (ret)
8126                         goto fail;
8127
8128                 leaf = path->nodes[0];
8129                 ei = btrfs_item_ptr(leaf, path->slots[0],
8130                                     struct btrfs_extent_item);
8131
8132                 btrfs_set_extent_refs(leaf, ei, 0);
8133                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8134
8135                 if (back->is_data) {
8136                         btrfs_set_extent_flags(leaf, ei,
8137                                                BTRFS_EXTENT_FLAG_DATA);
8138                 } else {
8139                         struct btrfs_disk_key copy_key;;
8140
8141                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8142                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8143                                              sizeof(*bi));
8144
8145                         btrfs_set_disk_key_objectid(&copy_key,
8146                                                     rec->info_objectid);
8147                         btrfs_set_disk_key_type(&copy_key, 0);
8148                         btrfs_set_disk_key_offset(&copy_key, 0);
8149
8150                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8151                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8152
8153                         btrfs_set_extent_flags(leaf, ei,
8154                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8155                 }
8156
8157                 btrfs_mark_buffer_dirty(leaf);
8158                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8159                                                rec->max_size, 1, 0);
8160                 if (ret)
8161                         goto fail;
8162                 btrfs_release_path(path);
8163         }
8164
8165         if (back->is_data) {
8166                 u64 parent;
8167                 int i;
8168
8169                 dback = to_data_backref(back);
8170                 if (back->full_backref)
8171                         parent = dback->parent;
8172                 else
8173                         parent = 0;
8174
8175                 for (i = 0; i < dback->found_ref; i++) {
8176                         /* if parent != 0, we're doing a full backref
8177                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8178                          * just makes the backref allocator create a data
8179                          * backref
8180                          */
8181                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8182                                                    rec->start, rec->max_size,
8183                                                    parent,
8184                                                    dback->root,
8185                                                    parent ?
8186                                                    BTRFS_FIRST_FREE_OBJECTID :
8187                                                    dback->owner,
8188                                                    dback->offset);
8189                         if (ret)
8190                                 break;
8191                 }
8192                 fprintf(stderr, "adding new data backref"
8193                                 " on %llu %s %llu owner %llu"
8194                                 " offset %llu found %d\n",
8195                                 (unsigned long long)rec->start,
8196                                 back->full_backref ?
8197                                 "parent" : "root",
8198                                 back->full_backref ?
8199                                 (unsigned long long)parent :
8200                                 (unsigned long long)dback->root,
8201                                 (unsigned long long)dback->owner,
8202                                 (unsigned long long)dback->offset,
8203                                 dback->found_ref);
8204         } else {
8205                 u64 parent;
8206                 struct tree_backref *tback;
8207
8208                 tback = to_tree_backref(back);
8209                 if (back->full_backref)
8210                         parent = tback->parent;
8211                 else
8212                         parent = 0;
8213
8214                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8215                                            rec->start, rec->max_size,
8216                                            parent, tback->root, 0, 0);
8217                 fprintf(stderr, "adding new tree backref on "
8218                         "start %llu len %llu parent %llu root %llu\n",
8219                         rec->start, rec->max_size, parent, tback->root);
8220         }
8221 fail:
8222         btrfs_release_path(path);
8223         return ret;
8224 }
8225
8226 static struct extent_entry *find_entry(struct list_head *entries,
8227                                        u64 bytenr, u64 bytes)
8228 {
8229         struct extent_entry *entry = NULL;
8230
8231         list_for_each_entry(entry, entries, list) {
8232                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8233                         return entry;
8234         }
8235
8236         return NULL;
8237 }
8238
8239 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8240 {
8241         struct extent_entry *entry, *best = NULL, *prev = NULL;
8242
8243         list_for_each_entry(entry, entries, list) {
8244                 /*
8245                  * If there are as many broken entries as entries then we know
8246                  * not to trust this particular entry.
8247                  */
8248                 if (entry->broken == entry->count)
8249                         continue;
8250
8251                 /*
8252                  * Special case, when there are only two entries and 'best' is
8253                  * the first one
8254                  */
8255                 if (!prev) {
8256                         best = entry;
8257                         prev = entry;
8258                         continue;
8259                 }
8260
8261                 /*
8262                  * If our current entry == best then we can't be sure our best
8263                  * is really the best, so we need to keep searching.
8264                  */
8265                 if (best && best->count == entry->count) {
8266                         prev = entry;
8267                         best = NULL;
8268                         continue;
8269                 }
8270
8271                 /* Prev == entry, not good enough, have to keep searching */
8272                 if (!prev->broken && prev->count == entry->count)
8273                         continue;
8274
8275                 if (!best)
8276                         best = (prev->count > entry->count) ? prev : entry;
8277                 else if (best->count < entry->count)
8278                         best = entry;
8279                 prev = entry;
8280         }
8281
8282         return best;
8283 }
8284
8285 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8286                       struct data_backref *dback, struct extent_entry *entry)
8287 {
8288         struct btrfs_trans_handle *trans;
8289         struct btrfs_root *root;
8290         struct btrfs_file_extent_item *fi;
8291         struct extent_buffer *leaf;
8292         struct btrfs_key key;
8293         u64 bytenr, bytes;
8294         int ret, err;
8295
8296         key.objectid = dback->root;
8297         key.type = BTRFS_ROOT_ITEM_KEY;
8298         key.offset = (u64)-1;
8299         root = btrfs_read_fs_root(info, &key);
8300         if (IS_ERR(root)) {
8301                 fprintf(stderr, "Couldn't find root for our ref\n");
8302                 return -EINVAL;
8303         }
8304
8305         /*
8306          * The backref points to the original offset of the extent if it was
8307          * split, so we need to search down to the offset we have and then walk
8308          * forward until we find the backref we're looking for.
8309          */
8310         key.objectid = dback->owner;
8311         key.type = BTRFS_EXTENT_DATA_KEY;
8312         key.offset = dback->offset;
8313         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8314         if (ret < 0) {
8315                 fprintf(stderr, "Error looking up ref %d\n", ret);
8316                 return ret;
8317         }
8318
8319         while (1) {
8320                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8321                         ret = btrfs_next_leaf(root, path);
8322                         if (ret) {
8323                                 fprintf(stderr, "Couldn't find our ref, next\n");
8324                                 return -EINVAL;
8325                         }
8326                 }
8327                 leaf = path->nodes[0];
8328                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8329                 if (key.objectid != dback->owner ||
8330                     key.type != BTRFS_EXTENT_DATA_KEY) {
8331                         fprintf(stderr, "Couldn't find our ref, search\n");
8332                         return -EINVAL;
8333                 }
8334                 fi = btrfs_item_ptr(leaf, path->slots[0],
8335                                     struct btrfs_file_extent_item);
8336                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8337                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8338
8339                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8340                         break;
8341                 path->slots[0]++;
8342         }
8343
8344         btrfs_release_path(path);
8345
8346         trans = btrfs_start_transaction(root, 1);
8347         if (IS_ERR(trans))
8348                 return PTR_ERR(trans);
8349
8350         /*
8351          * Ok we have the key of the file extent we want to fix, now we can cow
8352          * down to the thing and fix it.
8353          */
8354         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8355         if (ret < 0) {
8356                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8357                         key.objectid, key.type, key.offset, ret);
8358                 goto out;
8359         }
8360         if (ret > 0) {
8361                 fprintf(stderr, "Well that's odd, we just found this key "
8362                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8363                         key.offset);
8364                 ret = -EINVAL;
8365                 goto out;
8366         }
8367         leaf = path->nodes[0];
8368         fi = btrfs_item_ptr(leaf, path->slots[0],
8369                             struct btrfs_file_extent_item);
8370
8371         if (btrfs_file_extent_compression(leaf, fi) &&
8372             dback->disk_bytenr != entry->bytenr) {
8373                 fprintf(stderr, "Ref doesn't match the record start and is "
8374                         "compressed, please take a btrfs-image of this file "
8375                         "system and send it to a btrfs developer so they can "
8376                         "complete this functionality for bytenr %Lu\n",
8377                         dback->disk_bytenr);
8378                 ret = -EINVAL;
8379                 goto out;
8380         }
8381
8382         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8383                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8384         } else if (dback->disk_bytenr > entry->bytenr) {
8385                 u64 off_diff, offset;
8386
8387                 off_diff = dback->disk_bytenr - entry->bytenr;
8388                 offset = btrfs_file_extent_offset(leaf, fi);
8389                 if (dback->disk_bytenr + offset +
8390                     btrfs_file_extent_num_bytes(leaf, fi) >
8391                     entry->bytenr + entry->bytes) {
8392                         fprintf(stderr, "Ref is past the entry end, please "
8393                                 "take a btrfs-image of this file system and "
8394                                 "send it to a btrfs developer, ref %Lu\n",
8395                                 dback->disk_bytenr);
8396                         ret = -EINVAL;
8397                         goto out;
8398                 }
8399                 offset += off_diff;
8400                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8401                 btrfs_set_file_extent_offset(leaf, fi, offset);
8402         } else if (dback->disk_bytenr < entry->bytenr) {
8403                 u64 offset;
8404
8405                 offset = btrfs_file_extent_offset(leaf, fi);
8406                 if (dback->disk_bytenr + offset < entry->bytenr) {
8407                         fprintf(stderr, "Ref is before the entry start, please"
8408                                 " take a btrfs-image of this file system and "
8409                                 "send it to a btrfs developer, ref %Lu\n",
8410                                 dback->disk_bytenr);
8411                         ret = -EINVAL;
8412                         goto out;
8413                 }
8414
8415                 offset += dback->disk_bytenr;
8416                 offset -= entry->bytenr;
8417                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8418                 btrfs_set_file_extent_offset(leaf, fi, offset);
8419         }
8420
8421         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8422
8423         /*
8424          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8425          * only do this if we aren't using compression, otherwise it's a
8426          * trickier case.
8427          */
8428         if (!btrfs_file_extent_compression(leaf, fi))
8429                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8430         else
8431                 printf("ram bytes may be wrong?\n");
8432         btrfs_mark_buffer_dirty(leaf);
8433 out:
8434         err = btrfs_commit_transaction(trans, root);
8435         btrfs_release_path(path);
8436         return ret ? ret : err;
8437 }
8438
8439 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8440                            struct extent_record *rec)
8441 {
8442         struct extent_backref *back;
8443         struct data_backref *dback;
8444         struct extent_entry *entry, *best = NULL;
8445         LIST_HEAD(entries);
8446         int nr_entries = 0;
8447         int broken_entries = 0;
8448         int ret = 0;
8449         short mismatch = 0;
8450
8451         /*
8452          * Metadata is easy and the backrefs should always agree on bytenr and
8453          * size, if not we've got bigger issues.
8454          */
8455         if (rec->metadata)
8456                 return 0;
8457
8458         list_for_each_entry(back, &rec->backrefs, list) {
8459                 if (back->full_backref || !back->is_data)
8460                         continue;
8461
8462                 dback = to_data_backref(back);
8463
8464                 /*
8465                  * We only pay attention to backrefs that we found a real
8466                  * backref for.
8467                  */
8468                 if (dback->found_ref == 0)
8469                         continue;
8470
8471                 /*
8472                  * For now we only catch when the bytes don't match, not the
8473                  * bytenr.  We can easily do this at the same time, but I want
8474                  * to have a fs image to test on before we just add repair
8475                  * functionality willy-nilly so we know we won't screw up the
8476                  * repair.
8477                  */
8478
8479                 entry = find_entry(&entries, dback->disk_bytenr,
8480                                    dback->bytes);
8481                 if (!entry) {
8482                         entry = malloc(sizeof(struct extent_entry));
8483                         if (!entry) {
8484                                 ret = -ENOMEM;
8485                                 goto out;
8486                         }
8487                         memset(entry, 0, sizeof(*entry));
8488                         entry->bytenr = dback->disk_bytenr;
8489                         entry->bytes = dback->bytes;
8490                         list_add_tail(&entry->list, &entries);
8491                         nr_entries++;
8492                 }
8493
8494                 /*
8495                  * If we only have on entry we may think the entries agree when
8496                  * in reality they don't so we have to do some extra checking.
8497                  */
8498                 if (dback->disk_bytenr != rec->start ||
8499                     dback->bytes != rec->nr || back->broken)
8500                         mismatch = 1;
8501
8502                 if (back->broken) {
8503                         entry->broken++;
8504                         broken_entries++;
8505                 }
8506
8507                 entry->count++;
8508         }
8509
8510         /* Yay all the backrefs agree, carry on good sir */
8511         if (nr_entries <= 1 && !mismatch)
8512                 goto out;
8513
8514         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8515                 "%Lu\n", rec->start);
8516
8517         /*
8518          * First we want to see if the backrefs can agree amongst themselves who
8519          * is right, so figure out which one of the entries has the highest
8520          * count.
8521          */
8522         best = find_most_right_entry(&entries);
8523
8524         /*
8525          * Ok so we may have an even split between what the backrefs think, so
8526          * this is where we use the extent ref to see what it thinks.
8527          */
8528         if (!best) {
8529                 entry = find_entry(&entries, rec->start, rec->nr);
8530                 if (!entry && (!broken_entries || !rec->found_rec)) {
8531                         fprintf(stderr, "Backrefs don't agree with each other "
8532                                 "and extent record doesn't agree with anybody,"
8533                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8534                                 rec->start, rec->nr);
8535                         ret = -EINVAL;
8536                         goto out;
8537                 } else if (!entry) {
8538                         /*
8539                          * Ok our backrefs were broken, we'll assume this is the
8540                          * correct value and add an entry for this range.
8541                          */
8542                         entry = malloc(sizeof(struct extent_entry));
8543                         if (!entry) {
8544                                 ret = -ENOMEM;
8545                                 goto out;
8546                         }
8547                         memset(entry, 0, sizeof(*entry));
8548                         entry->bytenr = rec->start;
8549                         entry->bytes = rec->nr;
8550                         list_add_tail(&entry->list, &entries);
8551                         nr_entries++;
8552                 }
8553                 entry->count++;
8554                 best = find_most_right_entry(&entries);
8555                 if (!best) {
8556                         fprintf(stderr, "Backrefs and extent record evenly "
8557                                 "split on who is right, this is going to "
8558                                 "require user input to fix bytenr %Lu bytes "
8559                                 "%Lu\n", rec->start, rec->nr);
8560                         ret = -EINVAL;
8561                         goto out;
8562                 }
8563         }
8564
8565         /*
8566          * I don't think this can happen currently as we'll abort() if we catch
8567          * this case higher up, but in case somebody removes that we still can't
8568          * deal with it properly here yet, so just bail out of that's the case.
8569          */
8570         if (best->bytenr != rec->start) {
8571                 fprintf(stderr, "Extent start and backref starts don't match, "
8572                         "please use btrfs-image on this file system and send "
8573                         "it to a btrfs developer so they can make fsck fix "
8574                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8575                         rec->start, rec->nr);
8576                 ret = -EINVAL;
8577                 goto out;
8578         }
8579
8580         /*
8581          * Ok great we all agreed on an extent record, let's go find the real
8582          * references and fix up the ones that don't match.
8583          */
8584         list_for_each_entry(back, &rec->backrefs, list) {
8585                 if (back->full_backref || !back->is_data)
8586                         continue;
8587
8588                 dback = to_data_backref(back);
8589
8590                 /*
8591                  * Still ignoring backrefs that don't have a real ref attached
8592                  * to them.
8593                  */
8594                 if (dback->found_ref == 0)
8595                         continue;
8596
8597                 if (dback->bytes == best->bytes &&
8598                     dback->disk_bytenr == best->bytenr)
8599                         continue;
8600
8601                 ret = repair_ref(info, path, dback, best);
8602                 if (ret)
8603                         goto out;
8604         }
8605
8606         /*
8607          * Ok we messed with the actual refs, which means we need to drop our
8608          * entire cache and go back and rescan.  I know this is a huge pain and
8609          * adds a lot of extra work, but it's the only way to be safe.  Once all
8610          * the backrefs agree we may not need to do anything to the extent
8611          * record itself.
8612          */
8613         ret = -EAGAIN;
8614 out:
8615         while (!list_empty(&entries)) {
8616                 entry = list_entry(entries.next, struct extent_entry, list);
8617                 list_del_init(&entry->list);
8618                 free(entry);
8619         }
8620         return ret;
8621 }
8622
8623 static int process_duplicates(struct cache_tree *extent_cache,
8624                               struct extent_record *rec)
8625 {
8626         struct extent_record *good, *tmp;
8627         struct cache_extent *cache;
8628         int ret;
8629
8630         /*
8631          * If we found a extent record for this extent then return, or if we
8632          * have more than one duplicate we are likely going to need to delete
8633          * something.
8634          */
8635         if (rec->found_rec || rec->num_duplicates > 1)
8636                 return 0;
8637
8638         /* Shouldn't happen but just in case */
8639         BUG_ON(!rec->num_duplicates);
8640
8641         /*
8642          * So this happens if we end up with a backref that doesn't match the
8643          * actual extent entry.  So either the backref is bad or the extent
8644          * entry is bad.  Either way we want to have the extent_record actually
8645          * reflect what we found in the extent_tree, so we need to take the
8646          * duplicate out and use that as the extent_record since the only way we
8647          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8648          */
8649         remove_cache_extent(extent_cache, &rec->cache);
8650
8651         good = to_extent_record(rec->dups.next);
8652         list_del_init(&good->list);
8653         INIT_LIST_HEAD(&good->backrefs);
8654         INIT_LIST_HEAD(&good->dups);
8655         good->cache.start = good->start;
8656         good->cache.size = good->nr;
8657         good->content_checked = 0;
8658         good->owner_ref_checked = 0;
8659         good->num_duplicates = 0;
8660         good->refs = rec->refs;
8661         list_splice_init(&rec->backrefs, &good->backrefs);
8662         while (1) {
8663                 cache = lookup_cache_extent(extent_cache, good->start,
8664                                             good->nr);
8665                 if (!cache)
8666                         break;
8667                 tmp = container_of(cache, struct extent_record, cache);
8668
8669                 /*
8670                  * If we find another overlapping extent and it's found_rec is
8671                  * set then it's a duplicate and we need to try and delete
8672                  * something.
8673                  */
8674                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8675                         if (list_empty(&good->list))
8676                                 list_add_tail(&good->list,
8677                                               &duplicate_extents);
8678                         good->num_duplicates += tmp->num_duplicates + 1;
8679                         list_splice_init(&tmp->dups, &good->dups);
8680                         list_del_init(&tmp->list);
8681                         list_add_tail(&tmp->list, &good->dups);
8682                         remove_cache_extent(extent_cache, &tmp->cache);
8683                         continue;
8684                 }
8685
8686                 /*
8687                  * Ok we have another non extent item backed extent rec, so lets
8688                  * just add it to this extent and carry on like we did above.
8689                  */
8690                 good->refs += tmp->refs;
8691                 list_splice_init(&tmp->backrefs, &good->backrefs);
8692                 remove_cache_extent(extent_cache, &tmp->cache);
8693                 free(tmp);
8694         }
8695         ret = insert_cache_extent(extent_cache, &good->cache);
8696         BUG_ON(ret);
8697         free(rec);
8698         return good->num_duplicates ? 0 : 1;
8699 }
8700
8701 static int delete_duplicate_records(struct btrfs_root *root,
8702                                     struct extent_record *rec)
8703 {
8704         struct btrfs_trans_handle *trans;
8705         LIST_HEAD(delete_list);
8706         struct btrfs_path path;
8707         struct extent_record *tmp, *good, *n;
8708         int nr_del = 0;
8709         int ret = 0, err;
8710         struct btrfs_key key;
8711
8712         btrfs_init_path(&path);
8713
8714         good = rec;
8715         /* Find the record that covers all of the duplicates. */
8716         list_for_each_entry(tmp, &rec->dups, list) {
8717                 if (good->start < tmp->start)
8718                         continue;
8719                 if (good->nr > tmp->nr)
8720                         continue;
8721
8722                 if (tmp->start + tmp->nr < good->start + good->nr) {
8723                         fprintf(stderr, "Ok we have overlapping extents that "
8724                                 "aren't completely covered by each other, this "
8725                                 "is going to require more careful thought.  "
8726                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8727                                 tmp->start, tmp->nr, good->start, good->nr);
8728                         abort();
8729                 }
8730                 good = tmp;
8731         }
8732
8733         if (good != rec)
8734                 list_add_tail(&rec->list, &delete_list);
8735
8736         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8737                 if (tmp == good)
8738                         continue;
8739                 list_move_tail(&tmp->list, &delete_list);
8740         }
8741
8742         root = root->fs_info->extent_root;
8743         trans = btrfs_start_transaction(root, 1);
8744         if (IS_ERR(trans)) {
8745                 ret = PTR_ERR(trans);
8746                 goto out;
8747         }
8748
8749         list_for_each_entry(tmp, &delete_list, list) {
8750                 if (tmp->found_rec == 0)
8751                         continue;
8752                 key.objectid = tmp->start;
8753                 key.type = BTRFS_EXTENT_ITEM_KEY;
8754                 key.offset = tmp->nr;
8755
8756                 /* Shouldn't happen but just in case */
8757                 if (tmp->metadata) {
8758                         fprintf(stderr, "Well this shouldn't happen, extent "
8759                                 "record overlaps but is metadata? "
8760                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8761                         abort();
8762                 }
8763
8764                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8765                 if (ret) {
8766                         if (ret > 0)
8767                                 ret = -EINVAL;
8768                         break;
8769                 }
8770                 ret = btrfs_del_item(trans, root, &path);
8771                 if (ret)
8772                         break;
8773                 btrfs_release_path(&path);
8774                 nr_del++;
8775         }
8776         err = btrfs_commit_transaction(trans, root);
8777         if (err && !ret)
8778                 ret = err;
8779 out:
8780         while (!list_empty(&delete_list)) {
8781                 tmp = to_extent_record(delete_list.next);
8782                 list_del_init(&tmp->list);
8783                 if (tmp == rec)
8784                         continue;
8785                 free(tmp);
8786         }
8787
8788         while (!list_empty(&rec->dups)) {
8789                 tmp = to_extent_record(rec->dups.next);
8790                 list_del_init(&tmp->list);
8791                 free(tmp);
8792         }
8793
8794         btrfs_release_path(&path);
8795
8796         if (!ret && !nr_del)
8797                 rec->num_duplicates = 0;
8798
8799         return ret ? ret : nr_del;
8800 }
8801
8802 static int find_possible_backrefs(struct btrfs_fs_info *info,
8803                                   struct btrfs_path *path,
8804                                   struct cache_tree *extent_cache,
8805                                   struct extent_record *rec)
8806 {
8807         struct btrfs_root *root;
8808         struct extent_backref *back;
8809         struct data_backref *dback;
8810         struct cache_extent *cache;
8811         struct btrfs_file_extent_item *fi;
8812         struct btrfs_key key;
8813         u64 bytenr, bytes;
8814         int ret;
8815
8816         list_for_each_entry(back, &rec->backrefs, list) {
8817                 /* Don't care about full backrefs (poor unloved backrefs) */
8818                 if (back->full_backref || !back->is_data)
8819                         continue;
8820
8821                 dback = to_data_backref(back);
8822
8823                 /* We found this one, we don't need to do a lookup */
8824                 if (dback->found_ref)
8825                         continue;
8826
8827                 key.objectid = dback->root;
8828                 key.type = BTRFS_ROOT_ITEM_KEY;
8829                 key.offset = (u64)-1;
8830
8831                 root = btrfs_read_fs_root(info, &key);
8832
8833                 /* No root, definitely a bad ref, skip */
8834                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8835                         continue;
8836                 /* Other err, exit */
8837                 if (IS_ERR(root))
8838                         return PTR_ERR(root);
8839
8840                 key.objectid = dback->owner;
8841                 key.type = BTRFS_EXTENT_DATA_KEY;
8842                 key.offset = dback->offset;
8843                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8844                 if (ret) {
8845                         btrfs_release_path(path);
8846                         if (ret < 0)
8847                                 return ret;
8848                         /* Didn't find it, we can carry on */
8849                         ret = 0;
8850                         continue;
8851                 }
8852
8853                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8854                                     struct btrfs_file_extent_item);
8855                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8856                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8857                 btrfs_release_path(path);
8858                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8859                 if (cache) {
8860                         struct extent_record *tmp;
8861                         tmp = container_of(cache, struct extent_record, cache);
8862
8863                         /*
8864                          * If we found an extent record for the bytenr for this
8865                          * particular backref then we can't add it to our
8866                          * current extent record.  We only want to add backrefs
8867                          * that don't have a corresponding extent item in the
8868                          * extent tree since they likely belong to this record
8869                          * and we need to fix it if it doesn't match bytenrs.
8870                          */
8871                         if  (tmp->found_rec)
8872                                 continue;
8873                 }
8874
8875                 dback->found_ref += 1;
8876                 dback->disk_bytenr = bytenr;
8877                 dback->bytes = bytes;
8878
8879                 /*
8880                  * Set this so the verify backref code knows not to trust the
8881                  * values in this backref.
8882                  */
8883                 back->broken = 1;
8884         }
8885
8886         return 0;
8887 }
8888
8889 /*
8890  * Record orphan data ref into corresponding root.
8891  *
8892  * Return 0 if the extent item contains data ref and recorded.
8893  * Return 1 if the extent item contains no useful data ref
8894  *   On that case, it may contains only shared_dataref or metadata backref
8895  *   or the file extent exists(this should be handled by the extent bytenr
8896  *   recovery routine)
8897  * Return <0 if something goes wrong.
8898  */
8899 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8900                                       struct extent_record *rec)
8901 {
8902         struct btrfs_key key;
8903         struct btrfs_root *dest_root;
8904         struct extent_backref *back;
8905         struct data_backref *dback;
8906         struct orphan_data_extent *orphan;
8907         struct btrfs_path path;
8908         int recorded_data_ref = 0;
8909         int ret = 0;
8910
8911         if (rec->metadata)
8912                 return 1;
8913         btrfs_init_path(&path);
8914         list_for_each_entry(back, &rec->backrefs, list) {
8915                 if (back->full_backref || !back->is_data ||
8916                     !back->found_extent_tree)
8917                         continue;
8918                 dback = to_data_backref(back);
8919                 if (dback->found_ref)
8920                         continue;
8921                 key.objectid = dback->root;
8922                 key.type = BTRFS_ROOT_ITEM_KEY;
8923                 key.offset = (u64)-1;
8924
8925                 dest_root = btrfs_read_fs_root(fs_info, &key);
8926
8927                 /* For non-exist root we just skip it */
8928                 if (IS_ERR(dest_root) || !dest_root)
8929                         continue;
8930
8931                 key.objectid = dback->owner;
8932                 key.type = BTRFS_EXTENT_DATA_KEY;
8933                 key.offset = dback->offset;
8934
8935                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8936                 btrfs_release_path(&path);
8937                 /*
8938                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8939                  * we need to record it for inode/file extent rebuild.
8940                  * For ret > 0, we record it only for file extent rebuild.
8941                  * For ret == 0, the file extent exists but only bytenr
8942                  * mismatch, let the original bytenr fix routine to handle,
8943                  * don't record it.
8944                  */
8945                 if (ret == 0)
8946                         continue;
8947                 ret = 0;
8948                 orphan = malloc(sizeof(*orphan));
8949                 if (!orphan) {
8950                         ret = -ENOMEM;
8951                         goto out;
8952                 }
8953                 INIT_LIST_HEAD(&orphan->list);
8954                 orphan->root = dback->root;
8955                 orphan->objectid = dback->owner;
8956                 orphan->offset = dback->offset;
8957                 orphan->disk_bytenr = rec->cache.start;
8958                 orphan->disk_len = rec->cache.size;
8959                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8960                 recorded_data_ref = 1;
8961         }
8962 out:
8963         btrfs_release_path(&path);
8964         if (!ret)
8965                 return !recorded_data_ref;
8966         else
8967                 return ret;
8968 }
8969
8970 /*
8971  * when an incorrect extent item is found, this will delete
8972  * all of the existing entries for it and recreate them
8973  * based on what the tree scan found.
8974  */
8975 static int fixup_extent_refs(struct btrfs_fs_info *info,
8976                              struct cache_tree *extent_cache,
8977                              struct extent_record *rec)
8978 {
8979         struct btrfs_trans_handle *trans = NULL;
8980         int ret;
8981         struct btrfs_path path;
8982         struct list_head *cur = rec->backrefs.next;
8983         struct cache_extent *cache;
8984         struct extent_backref *back;
8985         int allocated = 0;
8986         u64 flags = 0;
8987
8988         if (rec->flag_block_full_backref)
8989                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8990
8991         btrfs_init_path(&path);
8992         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8993                 /*
8994                  * Sometimes the backrefs themselves are so broken they don't
8995                  * get attached to any meaningful rec, so first go back and
8996                  * check any of our backrefs that we couldn't find and throw
8997                  * them into the list if we find the backref so that
8998                  * verify_backrefs can figure out what to do.
8999                  */
9000                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9001                 if (ret < 0)
9002                         goto out;
9003         }
9004
9005         /* step one, make sure all of the backrefs agree */
9006         ret = verify_backrefs(info, &path, rec);
9007         if (ret < 0)
9008                 goto out;
9009
9010         trans = btrfs_start_transaction(info->extent_root, 1);
9011         if (IS_ERR(trans)) {
9012                 ret = PTR_ERR(trans);
9013                 goto out;
9014         }
9015
9016         /* step two, delete all the existing records */
9017         ret = delete_extent_records(trans, info->extent_root, &path,
9018                                     rec->start);
9019
9020         if (ret < 0)
9021                 goto out;
9022
9023         /* was this block corrupt?  If so, don't add references to it */
9024         cache = lookup_cache_extent(info->corrupt_blocks,
9025                                     rec->start, rec->max_size);
9026         if (cache) {
9027                 ret = 0;
9028                 goto out;
9029         }
9030
9031         /* step three, recreate all the refs we did find */
9032         while(cur != &rec->backrefs) {
9033                 back = to_extent_backref(cur);
9034                 cur = cur->next;
9035
9036                 /*
9037                  * if we didn't find any references, don't create a
9038                  * new extent record
9039                  */
9040                 if (!back->found_ref)
9041                         continue;
9042
9043                 rec->bad_full_backref = 0;
9044                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9045                 allocated = 1;
9046
9047                 if (ret)
9048                         goto out;
9049         }
9050 out:
9051         if (trans) {
9052                 int err = btrfs_commit_transaction(trans, info->extent_root);
9053                 if (!ret)
9054                         ret = err;
9055         }
9056
9057         if (!ret)
9058                 fprintf(stderr, "Repaired extent references for %llu\n",
9059                                 (unsigned long long)rec->start);
9060
9061         btrfs_release_path(&path);
9062         return ret;
9063 }
9064
9065 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9066                               struct extent_record *rec)
9067 {
9068         struct btrfs_trans_handle *trans;
9069         struct btrfs_root *root = fs_info->extent_root;
9070         struct btrfs_path path;
9071         struct btrfs_extent_item *ei;
9072         struct btrfs_key key;
9073         u64 flags;
9074         int ret = 0;
9075
9076         key.objectid = rec->start;
9077         if (rec->metadata) {
9078                 key.type = BTRFS_METADATA_ITEM_KEY;
9079                 key.offset = rec->info_level;
9080         } else {
9081                 key.type = BTRFS_EXTENT_ITEM_KEY;
9082                 key.offset = rec->max_size;
9083         }
9084
9085         trans = btrfs_start_transaction(root, 0);
9086         if (IS_ERR(trans))
9087                 return PTR_ERR(trans);
9088
9089         btrfs_init_path(&path);
9090         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9091         if (ret < 0) {
9092                 btrfs_release_path(&path);
9093                 btrfs_commit_transaction(trans, root);
9094                 return ret;
9095         } else if (ret) {
9096                 fprintf(stderr, "Didn't find extent for %llu\n",
9097                         (unsigned long long)rec->start);
9098                 btrfs_release_path(&path);
9099                 btrfs_commit_transaction(trans, root);
9100                 return -ENOENT;
9101         }
9102
9103         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9104                             struct btrfs_extent_item);
9105         flags = btrfs_extent_flags(path.nodes[0], ei);
9106         if (rec->flag_block_full_backref) {
9107                 fprintf(stderr, "setting full backref on %llu\n",
9108                         (unsigned long long)key.objectid);
9109                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9110         } else {
9111                 fprintf(stderr, "clearing full backref on %llu\n",
9112                         (unsigned long long)key.objectid);
9113                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9114         }
9115         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9116         btrfs_mark_buffer_dirty(path.nodes[0]);
9117         btrfs_release_path(&path);
9118         ret = btrfs_commit_transaction(trans, root);
9119         if (!ret)
9120                 fprintf(stderr, "Repaired extent flags for %llu\n",
9121                                 (unsigned long long)rec->start);
9122
9123         return ret;
9124 }
9125
9126 /* right now we only prune from the extent allocation tree */
9127 static int prune_one_block(struct btrfs_trans_handle *trans,
9128                            struct btrfs_fs_info *info,
9129                            struct btrfs_corrupt_block *corrupt)
9130 {
9131         int ret;
9132         struct btrfs_path path;
9133         struct extent_buffer *eb;
9134         u64 found;
9135         int slot;
9136         int nritems;
9137         int level = corrupt->level + 1;
9138
9139         btrfs_init_path(&path);
9140 again:
9141         /* we want to stop at the parent to our busted block */
9142         path.lowest_level = level;
9143
9144         ret = btrfs_search_slot(trans, info->extent_root,
9145                                 &corrupt->key, &path, -1, 1);
9146
9147         if (ret < 0)
9148                 goto out;
9149
9150         eb = path.nodes[level];
9151         if (!eb) {
9152                 ret = -ENOENT;
9153                 goto out;
9154         }
9155
9156         /*
9157          * hopefully the search gave us the block we want to prune,
9158          * lets try that first
9159          */
9160         slot = path.slots[level];
9161         found =  btrfs_node_blockptr(eb, slot);
9162         if (found == corrupt->cache.start)
9163                 goto del_ptr;
9164
9165         nritems = btrfs_header_nritems(eb);
9166
9167         /* the search failed, lets scan this node and hope we find it */
9168         for (slot = 0; slot < nritems; slot++) {
9169                 found =  btrfs_node_blockptr(eb, slot);
9170                 if (found == corrupt->cache.start)
9171                         goto del_ptr;
9172         }
9173         /*
9174          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9175          * to this block
9176          */
9177         if (eb == info->extent_root->node) {
9178                 ret = -ENOENT;
9179                 goto out;
9180         } else {
9181                 level++;
9182                 btrfs_release_path(&path);
9183                 goto again;
9184         }
9185
9186 del_ptr:
9187         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9188         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9189
9190 out:
9191         btrfs_release_path(&path);
9192         return ret;
9193 }
9194
9195 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9196 {
9197         struct btrfs_trans_handle *trans = NULL;
9198         struct cache_extent *cache;
9199         struct btrfs_corrupt_block *corrupt;
9200
9201         while (1) {
9202                 cache = search_cache_extent(info->corrupt_blocks, 0);
9203                 if (!cache)
9204                         break;
9205                 if (!trans) {
9206                         trans = btrfs_start_transaction(info->extent_root, 1);
9207                         if (IS_ERR(trans))
9208                                 return PTR_ERR(trans);
9209                 }
9210                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9211                 prune_one_block(trans, info, corrupt);
9212                 remove_cache_extent(info->corrupt_blocks, cache);
9213         }
9214         if (trans)
9215                 return btrfs_commit_transaction(trans, info->extent_root);
9216         return 0;
9217 }
9218
9219 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9220 {
9221         struct btrfs_block_group_cache *cache;
9222         u64 start, end;
9223         int ret;
9224
9225         while (1) {
9226                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9227                                             &start, &end, EXTENT_DIRTY);
9228                 if (ret)
9229                         break;
9230                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9231         }
9232
9233         start = 0;
9234         while (1) {
9235                 cache = btrfs_lookup_first_block_group(fs_info, start);
9236                 if (!cache)
9237                         break;
9238                 if (cache->cached)
9239                         cache->cached = 0;
9240                 start = cache->key.objectid + cache->key.offset;
9241         }
9242 }
9243
9244 static int check_extent_refs(struct btrfs_root *root,
9245                              struct cache_tree *extent_cache)
9246 {
9247         struct extent_record *rec;
9248         struct cache_extent *cache;
9249         int ret = 0;
9250         int had_dups = 0;
9251
9252         if (repair) {
9253                 /*
9254                  * if we're doing a repair, we have to make sure
9255                  * we don't allocate from the problem extents.
9256                  * In the worst case, this will be all the
9257                  * extents in the FS
9258                  */
9259                 cache = search_cache_extent(extent_cache, 0);
9260                 while(cache) {
9261                         rec = container_of(cache, struct extent_record, cache);
9262                         set_extent_dirty(root->fs_info->excluded_extents,
9263                                          rec->start,
9264                                          rec->start + rec->max_size - 1);
9265                         cache = next_cache_extent(cache);
9266                 }
9267
9268                 /* pin down all the corrupted blocks too */
9269                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9270                 while(cache) {
9271                         set_extent_dirty(root->fs_info->excluded_extents,
9272                                          cache->start,
9273                                          cache->start + cache->size - 1);
9274                         cache = next_cache_extent(cache);
9275                 }
9276                 prune_corrupt_blocks(root->fs_info);
9277                 reset_cached_block_groups(root->fs_info);
9278         }
9279
9280         reset_cached_block_groups(root->fs_info);
9281
9282         /*
9283          * We need to delete any duplicate entries we find first otherwise we
9284          * could mess up the extent tree when we have backrefs that actually
9285          * belong to a different extent item and not the weird duplicate one.
9286          */
9287         while (repair && !list_empty(&duplicate_extents)) {
9288                 rec = to_extent_record(duplicate_extents.next);
9289                 list_del_init(&rec->list);
9290
9291                 /* Sometimes we can find a backref before we find an actual
9292                  * extent, so we need to process it a little bit to see if there
9293                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9294                  * if this is a backref screwup.  If we need to delete stuff
9295                  * process_duplicates() will return 0, otherwise it will return
9296                  * 1 and we
9297                  */
9298                 if (process_duplicates(extent_cache, rec))
9299                         continue;
9300                 ret = delete_duplicate_records(root, rec);
9301                 if (ret < 0)
9302                         return ret;
9303                 /*
9304                  * delete_duplicate_records will return the number of entries
9305                  * deleted, so if it's greater than 0 then we know we actually
9306                  * did something and we need to remove.
9307                  */
9308                 if (ret)
9309                         had_dups = 1;
9310         }
9311
9312         if (had_dups)
9313                 return -EAGAIN;
9314
9315         while(1) {
9316                 int cur_err = 0;
9317                 int fix = 0;
9318
9319                 cache = search_cache_extent(extent_cache, 0);
9320                 if (!cache)
9321                         break;
9322                 rec = container_of(cache, struct extent_record, cache);
9323                 if (rec->num_duplicates) {
9324                         fprintf(stderr, "extent item %llu has multiple extent "
9325                                 "items\n", (unsigned long long)rec->start);
9326                         cur_err = 1;
9327                 }
9328
9329                 if (rec->refs != rec->extent_item_refs) {
9330                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9331                                 (unsigned long long)rec->start,
9332                                 (unsigned long long)rec->nr);
9333                         fprintf(stderr, "extent item %llu, found %llu\n",
9334                                 (unsigned long long)rec->extent_item_refs,
9335                                 (unsigned long long)rec->refs);
9336                         ret = record_orphan_data_extents(root->fs_info, rec);
9337                         if (ret < 0)
9338                                 goto repair_abort;
9339                         fix = ret;
9340                         cur_err = 1;
9341                 }
9342                 if (all_backpointers_checked(rec, 1)) {
9343                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9344                                 (unsigned long long)rec->start,
9345                                 (unsigned long long)rec->nr);
9346                         fix = 1;
9347                         cur_err = 1;
9348                 }
9349                 if (!rec->owner_ref_checked) {
9350                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9351                                 (unsigned long long)rec->start,
9352                                 (unsigned long long)rec->nr);
9353                         fix = 1;
9354                         cur_err = 1;
9355                 }
9356
9357                 if (repair && fix) {
9358                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9359                         if (ret)
9360                                 goto repair_abort;
9361                 }
9362
9363
9364                 if (rec->bad_full_backref) {
9365                         fprintf(stderr, "bad full backref, on [%llu]\n",
9366                                 (unsigned long long)rec->start);
9367                         if (repair) {
9368                                 ret = fixup_extent_flags(root->fs_info, rec);
9369                                 if (ret)
9370                                         goto repair_abort;
9371                                 fix = 1;
9372                         }
9373                         cur_err = 1;
9374                 }
9375                 /*
9376                  * Although it's not a extent ref's problem, we reuse this
9377                  * routine for error reporting.
9378                  * No repair function yet.
9379                  */
9380                 if (rec->crossing_stripes) {
9381                         fprintf(stderr,
9382                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9383                                 rec->start, rec->start + rec->max_size);
9384                         cur_err = 1;
9385                 }
9386
9387                 if (rec->wrong_chunk_type) {
9388                         fprintf(stderr,
9389                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9390                                 rec->start, rec->start + rec->max_size);
9391                         cur_err = 1;
9392                 }
9393
9394                 remove_cache_extent(extent_cache, cache);
9395                 free_all_extent_backrefs(rec);
9396                 if (!init_extent_tree && repair && (!cur_err || fix))
9397                         clear_extent_dirty(root->fs_info->excluded_extents,
9398                                            rec->start,
9399                                            rec->start + rec->max_size - 1);
9400                 free(rec);
9401         }
9402 repair_abort:
9403         if (repair) {
9404                 if (ret && ret != -EAGAIN) {
9405                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9406                         exit(1);
9407                 } else if (!ret) {
9408                         struct btrfs_trans_handle *trans;
9409
9410                         root = root->fs_info->extent_root;
9411                         trans = btrfs_start_transaction(root, 1);
9412                         if (IS_ERR(trans)) {
9413                                 ret = PTR_ERR(trans);
9414                                 goto repair_abort;
9415                         }
9416
9417                         btrfs_fix_block_accounting(trans, root);
9418                         ret = btrfs_commit_transaction(trans, root);
9419                         if (ret)
9420                                 goto repair_abort;
9421                 }
9422                 return ret;
9423         }
9424         return 0;
9425 }
9426
9427 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9428 {
9429         u64 stripe_size;
9430
9431         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9432                 stripe_size = length;
9433                 stripe_size /= num_stripes;
9434         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9435                 stripe_size = length * 2;
9436                 stripe_size /= num_stripes;
9437         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9438                 stripe_size = length;
9439                 stripe_size /= (num_stripes - 1);
9440         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9441                 stripe_size = length;
9442                 stripe_size /= (num_stripes - 2);
9443         } else {
9444                 stripe_size = length;
9445         }
9446         return stripe_size;
9447 }
9448
9449 /*
9450  * Check the chunk with its block group/dev list ref:
9451  * Return 0 if all refs seems valid.
9452  * Return 1 if part of refs seems valid, need later check for rebuild ref
9453  * like missing block group and needs to search extent tree to rebuild them.
9454  * Return -1 if essential refs are missing and unable to rebuild.
9455  */
9456 static int check_chunk_refs(struct chunk_record *chunk_rec,
9457                             struct block_group_tree *block_group_cache,
9458                             struct device_extent_tree *dev_extent_cache,
9459                             int silent)
9460 {
9461         struct cache_extent *block_group_item;
9462         struct block_group_record *block_group_rec;
9463         struct cache_extent *dev_extent_item;
9464         struct device_extent_record *dev_extent_rec;
9465         u64 devid;
9466         u64 offset;
9467         u64 length;
9468         int metadump_v2 = 0;
9469         int i;
9470         int ret = 0;
9471
9472         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9473                                                chunk_rec->offset,
9474                                                chunk_rec->length);
9475         if (block_group_item) {
9476                 block_group_rec = container_of(block_group_item,
9477                                                struct block_group_record,
9478                                                cache);
9479                 if (chunk_rec->length != block_group_rec->offset ||
9480                     chunk_rec->offset != block_group_rec->objectid ||
9481                     (!metadump_v2 &&
9482                      chunk_rec->type_flags != block_group_rec->flags)) {
9483                         if (!silent)
9484                                 fprintf(stderr,
9485                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9486                                         chunk_rec->objectid,
9487                                         chunk_rec->type,
9488                                         chunk_rec->offset,
9489                                         chunk_rec->length,
9490                                         chunk_rec->offset,
9491                                         chunk_rec->type_flags,
9492                                         block_group_rec->objectid,
9493                                         block_group_rec->type,
9494                                         block_group_rec->offset,
9495                                         block_group_rec->offset,
9496                                         block_group_rec->objectid,
9497                                         block_group_rec->flags);
9498                         ret = -1;
9499                 } else {
9500                         list_del_init(&block_group_rec->list);
9501                         chunk_rec->bg_rec = block_group_rec;
9502                 }
9503         } else {
9504                 if (!silent)
9505                         fprintf(stderr,
9506                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9507                                 chunk_rec->objectid,
9508                                 chunk_rec->type,
9509                                 chunk_rec->offset,
9510                                 chunk_rec->length,
9511                                 chunk_rec->offset,
9512                                 chunk_rec->type_flags);
9513                 ret = 1;
9514         }
9515
9516         if (metadump_v2)
9517                 return ret;
9518
9519         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9520                                     chunk_rec->num_stripes);
9521         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9522                 devid = chunk_rec->stripes[i].devid;
9523                 offset = chunk_rec->stripes[i].offset;
9524                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9525                                                        devid, offset, length);
9526                 if (dev_extent_item) {
9527                         dev_extent_rec = container_of(dev_extent_item,
9528                                                 struct device_extent_record,
9529                                                 cache);
9530                         if (dev_extent_rec->objectid != devid ||
9531                             dev_extent_rec->offset != offset ||
9532                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9533                             dev_extent_rec->length != length) {
9534                                 if (!silent)
9535                                         fprintf(stderr,
9536                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9537                                                 chunk_rec->objectid,
9538                                                 chunk_rec->type,
9539                                                 chunk_rec->offset,
9540                                                 chunk_rec->stripes[i].devid,
9541                                                 chunk_rec->stripes[i].offset,
9542                                                 dev_extent_rec->objectid,
9543                                                 dev_extent_rec->offset,
9544                                                 dev_extent_rec->length);
9545                                 ret = -1;
9546                         } else {
9547                                 list_move(&dev_extent_rec->chunk_list,
9548                                           &chunk_rec->dextents);
9549                         }
9550                 } else {
9551                         if (!silent)
9552                                 fprintf(stderr,
9553                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9554                                         chunk_rec->objectid,
9555                                         chunk_rec->type,
9556                                         chunk_rec->offset,
9557                                         chunk_rec->stripes[i].devid,
9558                                         chunk_rec->stripes[i].offset);
9559                         ret = -1;
9560                 }
9561         }
9562         return ret;
9563 }
9564
9565 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9566 int check_chunks(struct cache_tree *chunk_cache,
9567                  struct block_group_tree *block_group_cache,
9568                  struct device_extent_tree *dev_extent_cache,
9569                  struct list_head *good, struct list_head *bad,
9570                  struct list_head *rebuild, int silent)
9571 {
9572         struct cache_extent *chunk_item;
9573         struct chunk_record *chunk_rec;
9574         struct block_group_record *bg_rec;
9575         struct device_extent_record *dext_rec;
9576         int err;
9577         int ret = 0;
9578
9579         chunk_item = first_cache_extent(chunk_cache);
9580         while (chunk_item) {
9581                 chunk_rec = container_of(chunk_item, struct chunk_record,
9582                                          cache);
9583                 err = check_chunk_refs(chunk_rec, block_group_cache,
9584                                        dev_extent_cache, silent);
9585                 if (err < 0)
9586                         ret = err;
9587                 if (err == 0 && good)
9588                         list_add_tail(&chunk_rec->list, good);
9589                 if (err > 0 && rebuild)
9590                         list_add_tail(&chunk_rec->list, rebuild);
9591                 if (err < 0 && bad)
9592                         list_add_tail(&chunk_rec->list, bad);
9593                 chunk_item = next_cache_extent(chunk_item);
9594         }
9595
9596         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9597                 if (!silent)
9598                         fprintf(stderr,
9599                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9600                                 bg_rec->objectid,
9601                                 bg_rec->offset,
9602                                 bg_rec->flags);
9603                 if (!ret)
9604                         ret = 1;
9605         }
9606
9607         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9608                             chunk_list) {
9609                 if (!silent)
9610                         fprintf(stderr,
9611                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9612                                 dext_rec->objectid,
9613                                 dext_rec->offset,
9614                                 dext_rec->length);
9615                 if (!ret)
9616                         ret = 1;
9617         }
9618         return ret;
9619 }
9620
9621
9622 static int check_device_used(struct device_record *dev_rec,
9623                              struct device_extent_tree *dext_cache)
9624 {
9625         struct cache_extent *cache;
9626         struct device_extent_record *dev_extent_rec;
9627         u64 total_byte = 0;
9628
9629         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9630         while (cache) {
9631                 dev_extent_rec = container_of(cache,
9632                                               struct device_extent_record,
9633                                               cache);
9634                 if (dev_extent_rec->objectid != dev_rec->devid)
9635                         break;
9636
9637                 list_del_init(&dev_extent_rec->device_list);
9638                 total_byte += dev_extent_rec->length;
9639                 cache = next_cache_extent(cache);
9640         }
9641
9642         if (total_byte != dev_rec->byte_used) {
9643                 fprintf(stderr,
9644                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9645                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9646                         dev_rec->type, dev_rec->offset);
9647                 return -1;
9648         } else {
9649                 return 0;
9650         }
9651 }
9652
9653 /* check btrfs_dev_item -> btrfs_dev_extent */
9654 static int check_devices(struct rb_root *dev_cache,
9655                          struct device_extent_tree *dev_extent_cache)
9656 {
9657         struct rb_node *dev_node;
9658         struct device_record *dev_rec;
9659         struct device_extent_record *dext_rec;
9660         int err;
9661         int ret = 0;
9662
9663         dev_node = rb_first(dev_cache);
9664         while (dev_node) {
9665                 dev_rec = container_of(dev_node, struct device_record, node);
9666                 err = check_device_used(dev_rec, dev_extent_cache);
9667                 if (err)
9668                         ret = err;
9669
9670                 dev_node = rb_next(dev_node);
9671         }
9672         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9673                             device_list) {
9674                 fprintf(stderr,
9675                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9676                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9677                 if (!ret)
9678                         ret = 1;
9679         }
9680         return ret;
9681 }
9682
9683 static int add_root_item_to_list(struct list_head *head,
9684                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9685                                   u8 level, u8 drop_level,
9686                                   int level_size, struct btrfs_key *drop_key)
9687 {
9688
9689         struct root_item_record *ri_rec;
9690         ri_rec = malloc(sizeof(*ri_rec));
9691         if (!ri_rec)
9692                 return -ENOMEM;
9693         ri_rec->bytenr = bytenr;
9694         ri_rec->objectid = objectid;
9695         ri_rec->level = level;
9696         ri_rec->level_size = level_size;
9697         ri_rec->drop_level = drop_level;
9698         ri_rec->last_snapshot = last_snapshot;
9699         if (drop_key)
9700                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9701         list_add_tail(&ri_rec->list, head);
9702
9703         return 0;
9704 }
9705
9706 static void free_root_item_list(struct list_head *list)
9707 {
9708         struct root_item_record *ri_rec;
9709
9710         while (!list_empty(list)) {
9711                 ri_rec = list_first_entry(list, struct root_item_record,
9712                                           list);
9713                 list_del_init(&ri_rec->list);
9714                 free(ri_rec);
9715         }
9716 }
9717
9718 static int deal_root_from_list(struct list_head *list,
9719                                struct btrfs_root *root,
9720                                struct block_info *bits,
9721                                int bits_nr,
9722                                struct cache_tree *pending,
9723                                struct cache_tree *seen,
9724                                struct cache_tree *reada,
9725                                struct cache_tree *nodes,
9726                                struct cache_tree *extent_cache,
9727                                struct cache_tree *chunk_cache,
9728                                struct rb_root *dev_cache,
9729                                struct block_group_tree *block_group_cache,
9730                                struct device_extent_tree *dev_extent_cache)
9731 {
9732         int ret = 0;
9733         u64 last;
9734
9735         while (!list_empty(list)) {
9736                 struct root_item_record *rec;
9737                 struct extent_buffer *buf;
9738                 rec = list_entry(list->next,
9739                                  struct root_item_record, list);
9740                 last = 0;
9741                 buf = read_tree_block(root->fs_info->tree_root,
9742                                       rec->bytenr, rec->level_size, 0);
9743                 if (!extent_buffer_uptodate(buf)) {
9744                         free_extent_buffer(buf);
9745                         ret = -EIO;
9746                         break;
9747                 }
9748                 ret = add_root_to_pending(buf, extent_cache, pending,
9749                                     seen, nodes, rec->objectid);
9750                 if (ret < 0)
9751                         break;
9752                 /*
9753                  * To rebuild extent tree, we need deal with snapshot
9754                  * one by one, otherwise we deal with node firstly which
9755                  * can maximize readahead.
9756                  */
9757                 while (1) {
9758                         ret = run_next_block(root, bits, bits_nr, &last,
9759                                              pending, seen, reada, nodes,
9760                                              extent_cache, chunk_cache,
9761                                              dev_cache, block_group_cache,
9762                                              dev_extent_cache, rec);
9763                         if (ret != 0)
9764                                 break;
9765                 }
9766                 free_extent_buffer(buf);
9767                 list_del(&rec->list);
9768                 free(rec);
9769                 if (ret < 0)
9770                         break;
9771         }
9772         while (ret >= 0) {
9773                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9774                                      reada, nodes, extent_cache, chunk_cache,
9775                                      dev_cache, block_group_cache,
9776                                      dev_extent_cache, NULL);
9777                 if (ret != 0) {
9778                         if (ret > 0)
9779                                 ret = 0;
9780                         break;
9781                 }
9782         }
9783         return ret;
9784 }
9785
9786 static int check_chunks_and_extents(struct btrfs_root *root)
9787 {
9788         struct rb_root dev_cache;
9789         struct cache_tree chunk_cache;
9790         struct block_group_tree block_group_cache;
9791         struct device_extent_tree dev_extent_cache;
9792         struct cache_tree extent_cache;
9793         struct cache_tree seen;
9794         struct cache_tree pending;
9795         struct cache_tree reada;
9796         struct cache_tree nodes;
9797         struct extent_io_tree excluded_extents;
9798         struct cache_tree corrupt_blocks;
9799         struct btrfs_path path;
9800         struct btrfs_key key;
9801         struct btrfs_key found_key;
9802         int ret, err = 0;
9803         struct block_info *bits;
9804         int bits_nr;
9805         struct extent_buffer *leaf;
9806         int slot;
9807         struct btrfs_root_item ri;
9808         struct list_head dropping_trees;
9809         struct list_head normal_trees;
9810         struct btrfs_root *root1;
9811         u64 objectid;
9812         u32 level_size;
9813         u8 level;
9814
9815         dev_cache = RB_ROOT;
9816         cache_tree_init(&chunk_cache);
9817         block_group_tree_init(&block_group_cache);
9818         device_extent_tree_init(&dev_extent_cache);
9819
9820         cache_tree_init(&extent_cache);
9821         cache_tree_init(&seen);
9822         cache_tree_init(&pending);
9823         cache_tree_init(&nodes);
9824         cache_tree_init(&reada);
9825         cache_tree_init(&corrupt_blocks);
9826         extent_io_tree_init(&excluded_extents);
9827         INIT_LIST_HEAD(&dropping_trees);
9828         INIT_LIST_HEAD(&normal_trees);
9829
9830         if (repair) {
9831                 root->fs_info->excluded_extents = &excluded_extents;
9832                 root->fs_info->fsck_extent_cache = &extent_cache;
9833                 root->fs_info->free_extent_hook = free_extent_hook;
9834                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9835         }
9836
9837         bits_nr = 1024;
9838         bits = malloc(bits_nr * sizeof(struct block_info));
9839         if (!bits) {
9840                 perror("malloc");
9841                 exit(1);
9842         }
9843
9844         if (ctx.progress_enabled) {
9845                 ctx.tp = TASK_EXTENTS;
9846                 task_start(ctx.info);
9847         }
9848
9849 again:
9850         root1 = root->fs_info->tree_root;
9851         level = btrfs_header_level(root1->node);
9852         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9853                                     root1->node->start, 0, level, 0,
9854                                     root1->nodesize, NULL);
9855         if (ret < 0)
9856                 goto out;
9857         root1 = root->fs_info->chunk_root;
9858         level = btrfs_header_level(root1->node);
9859         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9860                                     root1->node->start, 0, level, 0,
9861                                     root1->nodesize, NULL);
9862         if (ret < 0)
9863                 goto out;
9864         btrfs_init_path(&path);
9865         key.offset = 0;
9866         key.objectid = 0;
9867         key.type = BTRFS_ROOT_ITEM_KEY;
9868         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9869                                         &key, &path, 0, 0);
9870         if (ret < 0)
9871                 goto out;
9872         while(1) {
9873                 leaf = path.nodes[0];
9874                 slot = path.slots[0];
9875                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9876                         ret = btrfs_next_leaf(root, &path);
9877                         if (ret != 0)
9878                                 break;
9879                         leaf = path.nodes[0];
9880                         slot = path.slots[0];
9881                 }
9882                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9883                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9884                         unsigned long offset;
9885                         u64 last_snapshot;
9886
9887                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9888                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9889                         last_snapshot = btrfs_root_last_snapshot(&ri);
9890                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9891                                 level = btrfs_root_level(&ri);
9892                                 level_size = root->nodesize;
9893                                 ret = add_root_item_to_list(&normal_trees,
9894                                                 found_key.objectid,
9895                                                 btrfs_root_bytenr(&ri),
9896                                                 last_snapshot, level,
9897                                                 0, level_size, NULL);
9898                                 if (ret < 0)
9899                                         goto out;
9900                         } else {
9901                                 level = btrfs_root_level(&ri);
9902                                 level_size = root->nodesize;
9903                                 objectid = found_key.objectid;
9904                                 btrfs_disk_key_to_cpu(&found_key,
9905                                                       &ri.drop_progress);
9906                                 ret = add_root_item_to_list(&dropping_trees,
9907                                                 objectid,
9908                                                 btrfs_root_bytenr(&ri),
9909                                                 last_snapshot, level,
9910                                                 ri.drop_level,
9911                                                 level_size, &found_key);
9912                                 if (ret < 0)
9913                                         goto out;
9914                         }
9915                 }
9916                 path.slots[0]++;
9917         }
9918         btrfs_release_path(&path);
9919
9920         /*
9921          * check_block can return -EAGAIN if it fixes something, please keep
9922          * this in mind when dealing with return values from these functions, if
9923          * we get -EAGAIN we want to fall through and restart the loop.
9924          */
9925         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9926                                   &seen, &reada, &nodes, &extent_cache,
9927                                   &chunk_cache, &dev_cache, &block_group_cache,
9928                                   &dev_extent_cache);
9929         if (ret < 0) {
9930                 if (ret == -EAGAIN)
9931                         goto loop;
9932                 goto out;
9933         }
9934         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9935                                   &pending, &seen, &reada, &nodes,
9936                                   &extent_cache, &chunk_cache, &dev_cache,
9937                                   &block_group_cache, &dev_extent_cache);
9938         if (ret < 0) {
9939                 if (ret == -EAGAIN)
9940                         goto loop;
9941                 goto out;
9942         }
9943
9944         ret = check_chunks(&chunk_cache, &block_group_cache,
9945                            &dev_extent_cache, NULL, NULL, NULL, 0);
9946         if (ret) {
9947                 if (ret == -EAGAIN)
9948                         goto loop;
9949                 err = ret;
9950         }
9951
9952         ret = check_extent_refs(root, &extent_cache);
9953         if (ret < 0) {
9954                 if (ret == -EAGAIN)
9955                         goto loop;
9956                 goto out;
9957         }
9958
9959         ret = check_devices(&dev_cache, &dev_extent_cache);
9960         if (ret && err)
9961                 ret = err;
9962
9963 out:
9964         task_stop(ctx.info);
9965         if (repair) {
9966                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9967                 extent_io_tree_cleanup(&excluded_extents);
9968                 root->fs_info->fsck_extent_cache = NULL;
9969                 root->fs_info->free_extent_hook = NULL;
9970                 root->fs_info->corrupt_blocks = NULL;
9971                 root->fs_info->excluded_extents = NULL;
9972         }
9973         free(bits);
9974         free_chunk_cache_tree(&chunk_cache);
9975         free_device_cache_tree(&dev_cache);
9976         free_block_group_tree(&block_group_cache);
9977         free_device_extent_tree(&dev_extent_cache);
9978         free_extent_cache_tree(&seen);
9979         free_extent_cache_tree(&pending);
9980         free_extent_cache_tree(&reada);
9981         free_extent_cache_tree(&nodes);
9982         free_root_item_list(&normal_trees);
9983         free_root_item_list(&dropping_trees);
9984         return ret;
9985 loop:
9986         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9987         free_extent_cache_tree(&seen);
9988         free_extent_cache_tree(&pending);
9989         free_extent_cache_tree(&reada);
9990         free_extent_cache_tree(&nodes);
9991         free_chunk_cache_tree(&chunk_cache);
9992         free_block_group_tree(&block_group_cache);
9993         free_device_cache_tree(&dev_cache);
9994         free_device_extent_tree(&dev_extent_cache);
9995         free_extent_record_cache(&extent_cache);
9996         free_root_item_list(&normal_trees);
9997         free_root_item_list(&dropping_trees);
9998         extent_io_tree_cleanup(&excluded_extents);
9999         goto again;
10000 }
10001
10002 /*
10003  * Check backrefs of a tree block given by @bytenr or @eb.
10004  *
10005  * @root:       the root containing the @bytenr or @eb
10006  * @eb:         tree block extent buffer, can be NULL
10007  * @bytenr:     bytenr of the tree block to search
10008  * @level:      tree level of the tree block
10009  * @owner:      owner of the tree block
10010  *
10011  * Return >0 for any error found and output error message
10012  * Return 0 for no error found
10013  */
10014 static int check_tree_block_ref(struct btrfs_root *root,
10015                                 struct extent_buffer *eb, u64 bytenr,
10016                                 int level, u64 owner)
10017 {
10018         struct btrfs_key key;
10019         struct btrfs_root *extent_root = root->fs_info->extent_root;
10020         struct btrfs_path path;
10021         struct btrfs_extent_item *ei;
10022         struct btrfs_extent_inline_ref *iref;
10023         struct extent_buffer *leaf;
10024         unsigned long end;
10025         unsigned long ptr;
10026         int slot;
10027         int skinny_level;
10028         int type;
10029         u32 nodesize = root->nodesize;
10030         u32 item_size;
10031         u64 offset;
10032         int tree_reloc_root = 0;
10033         int found_ref = 0;
10034         int err = 0;
10035         int ret;
10036
10037         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10038             btrfs_header_bytenr(root->node) == bytenr)
10039                 tree_reloc_root = 1;
10040
10041         btrfs_init_path(&path);
10042         key.objectid = bytenr;
10043         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10044                 key.type = BTRFS_METADATA_ITEM_KEY;
10045         else
10046                 key.type = BTRFS_EXTENT_ITEM_KEY;
10047         key.offset = (u64)-1;
10048
10049         /* Search for the backref in extent tree */
10050         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10051         if (ret < 0) {
10052                 err |= BACKREF_MISSING;
10053                 goto out;
10054         }
10055         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10056         if (ret) {
10057                 err |= BACKREF_MISSING;
10058                 goto out;
10059         }
10060
10061         leaf = path.nodes[0];
10062         slot = path.slots[0];
10063         btrfs_item_key_to_cpu(leaf, &key, slot);
10064
10065         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10066
10067         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10068                 skinny_level = (int)key.offset;
10069                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10070         } else {
10071                 struct btrfs_tree_block_info *info;
10072
10073                 info = (struct btrfs_tree_block_info *)(ei + 1);
10074                 skinny_level = btrfs_tree_block_level(leaf, info);
10075                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10076         }
10077
10078         if (eb) {
10079                 u64 header_gen;
10080                 u64 extent_gen;
10081
10082                 if (!(btrfs_extent_flags(leaf, ei) &
10083                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10084                         error(
10085                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10086                                 key.objectid, nodesize,
10087                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10088                         err = BACKREF_MISMATCH;
10089                 }
10090                 header_gen = btrfs_header_generation(eb);
10091                 extent_gen = btrfs_extent_generation(leaf, ei);
10092                 if (header_gen != extent_gen) {
10093                         error(
10094         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10095                                 key.objectid, nodesize, header_gen,
10096                                 extent_gen);
10097                         err = BACKREF_MISMATCH;
10098                 }
10099                 if (level != skinny_level) {
10100                         error(
10101                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10102                                 key.objectid, nodesize, level, skinny_level);
10103                         err = BACKREF_MISMATCH;
10104                 }
10105                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10106                         error(
10107                         "extent[%llu %u] is referred by other roots than %llu",
10108                                 key.objectid, nodesize, root->objectid);
10109                         err = BACKREF_MISMATCH;
10110                 }
10111         }
10112
10113         /*
10114          * Iterate the extent/metadata item to find the exact backref
10115          */
10116         item_size = btrfs_item_size_nr(leaf, slot);
10117         ptr = (unsigned long)iref;
10118         end = (unsigned long)ei + item_size;
10119         while (ptr < end) {
10120                 iref = (struct btrfs_extent_inline_ref *)ptr;
10121                 type = btrfs_extent_inline_ref_type(leaf, iref);
10122                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10123
10124                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10125                         (offset == root->objectid || offset == owner)) {
10126                         found_ref = 1;
10127                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10128                         /*
10129                          * Backref of tree reloc root points to itself, no need
10130                          * to check backref any more.
10131                          */
10132                         if (tree_reloc_root)
10133                                 found_ref = 1;
10134                         else
10135                         /* Check if the backref points to valid referencer */
10136                                 found_ref = !check_tree_block_ref(root, NULL,
10137                                                 offset, level + 1, owner);
10138                 }
10139
10140                 if (found_ref)
10141                         break;
10142                 ptr += btrfs_extent_inline_ref_size(type);
10143         }
10144
10145         /*
10146          * Inlined extent item doesn't have what we need, check
10147          * TREE_BLOCK_REF_KEY
10148          */
10149         if (!found_ref) {
10150                 btrfs_release_path(&path);
10151                 key.objectid = bytenr;
10152                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10153                 key.offset = root->objectid;
10154
10155                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10156                 if (!ret)
10157                         found_ref = 1;
10158         }
10159         if (!found_ref)
10160                 err |= BACKREF_MISSING;
10161 out:
10162         btrfs_release_path(&path);
10163         if (eb && (err & BACKREF_MISSING))
10164                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10165                         bytenr, nodesize, owner, level);
10166         return err;
10167 }
10168
10169 /*
10170  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10171  *
10172  * Return >0 any error found and output error message
10173  * Return 0 for no error found
10174  */
10175 static int check_extent_data_item(struct btrfs_root *root,
10176                                   struct extent_buffer *eb, int slot)
10177 {
10178         struct btrfs_file_extent_item *fi;
10179         struct btrfs_path path;
10180         struct btrfs_root *extent_root = root->fs_info->extent_root;
10181         struct btrfs_key fi_key;
10182         struct btrfs_key dbref_key;
10183         struct extent_buffer *leaf;
10184         struct btrfs_extent_item *ei;
10185         struct btrfs_extent_inline_ref *iref;
10186         struct btrfs_extent_data_ref *dref;
10187         u64 owner;
10188         u64 disk_bytenr;
10189         u64 disk_num_bytes;
10190         u64 extent_num_bytes;
10191         u64 extent_flags;
10192         u32 item_size;
10193         unsigned long end;
10194         unsigned long ptr;
10195         int type;
10196         u64 ref_root;
10197         int found_dbackref = 0;
10198         int err = 0;
10199         int ret;
10200
10201         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10202         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10203
10204         /* Nothing to check for hole and inline data extents */
10205         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10206             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10207                 return 0;
10208
10209         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10210         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10211         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10212
10213         /* Check unaligned disk_num_bytes and num_bytes */
10214         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10215                 error(
10216 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10217                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10218                         root->sectorsize);
10219                 err |= BYTES_UNALIGNED;
10220         } else {
10221                 data_bytes_allocated += disk_num_bytes;
10222         }
10223         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10224                 error(
10225 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10226                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10227                         root->sectorsize);
10228                 err |= BYTES_UNALIGNED;
10229         } else {
10230                 data_bytes_referenced += extent_num_bytes;
10231         }
10232         owner = btrfs_header_owner(eb);
10233
10234         /* Check the extent item of the file extent in extent tree */
10235         btrfs_init_path(&path);
10236         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10237         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10238         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10239
10240         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10241         if (ret)
10242                 goto out;
10243
10244         leaf = path.nodes[0];
10245         slot = path.slots[0];
10246         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10247
10248         extent_flags = btrfs_extent_flags(leaf, ei);
10249
10250         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10251                 error(
10252                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10253                     disk_bytenr, disk_num_bytes,
10254                     BTRFS_EXTENT_FLAG_DATA);
10255                 err |= BACKREF_MISMATCH;
10256         }
10257
10258         /* Check data backref inside that extent item */
10259         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10260         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10261         ptr = (unsigned long)iref;
10262         end = (unsigned long)ei + item_size;
10263         while (ptr < end) {
10264                 iref = (struct btrfs_extent_inline_ref *)ptr;
10265                 type = btrfs_extent_inline_ref_type(leaf, iref);
10266                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10267
10268                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10269                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10270                         if (ref_root == owner || ref_root == root->objectid)
10271                                 found_dbackref = 1;
10272                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10273                         found_dbackref = !check_tree_block_ref(root, NULL,
10274                                 btrfs_extent_inline_ref_offset(leaf, iref),
10275                                 0, owner);
10276                 }
10277
10278                 if (found_dbackref)
10279                         break;
10280                 ptr += btrfs_extent_inline_ref_size(type);
10281         }
10282
10283         if (!found_dbackref) {
10284                 btrfs_release_path(&path);
10285
10286                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10287                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10288                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10289                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10290                                 fi_key.objectid, fi_key.offset);
10291
10292                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10293                                         &dbref_key, &path, 0, 0);
10294                 if (!ret) {
10295                         found_dbackref = 1;
10296                         goto out;
10297                 }
10298
10299                 btrfs_release_path(&path);
10300
10301                 /*
10302                  * Neither inlined nor EXTENT_DATA_REF found, try
10303                  * SHARED_DATA_REF as last chance.
10304                  */
10305                 dbref_key.objectid = disk_bytenr;
10306                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10307                 dbref_key.offset = eb->start;
10308
10309                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10310                                         &dbref_key, &path, 0, 0);
10311                 if (!ret) {
10312                         found_dbackref = 1;
10313                         goto out;
10314                 }
10315         }
10316
10317 out:
10318         if (!found_dbackref)
10319                 err |= BACKREF_MISSING;
10320         btrfs_release_path(&path);
10321         if (err & BACKREF_MISSING) {
10322                 error("data extent[%llu %llu] backref lost",
10323                       disk_bytenr, disk_num_bytes);
10324         }
10325         return err;
10326 }
10327
10328 /*
10329  * Get real tree block level for the case like shared block
10330  * Return >= 0 as tree level
10331  * Return <0 for error
10332  */
10333 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10334 {
10335         struct extent_buffer *eb;
10336         struct btrfs_path path;
10337         struct btrfs_key key;
10338         struct btrfs_extent_item *ei;
10339         u64 flags;
10340         u64 transid;
10341         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10342         u8 backref_level;
10343         u8 header_level;
10344         int ret;
10345
10346         /* Search extent tree for extent generation and level */
10347         key.objectid = bytenr;
10348         key.type = BTRFS_METADATA_ITEM_KEY;
10349         key.offset = (u64)-1;
10350
10351         btrfs_init_path(&path);
10352         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10353         if (ret < 0)
10354                 goto release_out;
10355         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10356         if (ret < 0)
10357                 goto release_out;
10358         if (ret > 0) {
10359                 ret = -ENOENT;
10360                 goto release_out;
10361         }
10362
10363         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10364         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10365                             struct btrfs_extent_item);
10366         flags = btrfs_extent_flags(path.nodes[0], ei);
10367         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10368                 ret = -ENOENT;
10369                 goto release_out;
10370         }
10371
10372         /* Get transid for later read_tree_block() check */
10373         transid = btrfs_extent_generation(path.nodes[0], ei);
10374
10375         /* Get backref level as one source */
10376         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10377                 backref_level = key.offset;
10378         } else {
10379                 struct btrfs_tree_block_info *info;
10380
10381                 info = (struct btrfs_tree_block_info *)(ei + 1);
10382                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10383         }
10384         btrfs_release_path(&path);
10385
10386         /* Get level from tree block as an alternative source */
10387         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10388         if (!extent_buffer_uptodate(eb)) {
10389                 free_extent_buffer(eb);
10390                 return -EIO;
10391         }
10392         header_level = btrfs_header_level(eb);
10393         free_extent_buffer(eb);
10394
10395         if (header_level != backref_level)
10396                 return -EIO;
10397         return header_level;
10398
10399 release_out:
10400         btrfs_release_path(&path);
10401         return ret;
10402 }
10403
10404 /*
10405  * Check if a tree block backref is valid (points to a valid tree block)
10406  * if level == -1, level will be resolved
10407  * Return >0 for any error found and print error message
10408  */
10409 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10410                                     u64 bytenr, int level)
10411 {
10412         struct btrfs_root *root;
10413         struct btrfs_key key;
10414         struct btrfs_path path;
10415         struct extent_buffer *eb;
10416         struct extent_buffer *node;
10417         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10418         int err = 0;
10419         int ret;
10420
10421         /* Query level for level == -1 special case */
10422         if (level == -1)
10423                 level = query_tree_block_level(fs_info, bytenr);
10424         if (level < 0) {
10425                 err |= REFERENCER_MISSING;
10426                 goto out;
10427         }
10428
10429         key.objectid = root_id;
10430         key.type = BTRFS_ROOT_ITEM_KEY;
10431         key.offset = (u64)-1;
10432
10433         root = btrfs_read_fs_root(fs_info, &key);
10434         if (IS_ERR(root)) {
10435                 err |= REFERENCER_MISSING;
10436                 goto out;
10437         }
10438
10439         /* Read out the tree block to get item/node key */
10440         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10441         if (!extent_buffer_uptodate(eb)) {
10442                 err |= REFERENCER_MISSING;
10443                 free_extent_buffer(eb);
10444                 goto out;
10445         }
10446
10447         /* Empty tree, no need to check key */
10448         if (!btrfs_header_nritems(eb) && !level) {
10449                 free_extent_buffer(eb);
10450                 goto out;
10451         }
10452
10453         if (level)
10454                 btrfs_node_key_to_cpu(eb, &key, 0);
10455         else
10456                 btrfs_item_key_to_cpu(eb, &key, 0);
10457
10458         free_extent_buffer(eb);
10459
10460         btrfs_init_path(&path);
10461         path.lowest_level = level;
10462         /* Search with the first key, to ensure we can reach it */
10463         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10464         if (ret < 0) {
10465                 err |= REFERENCER_MISSING;
10466                 goto release_out;
10467         }
10468
10469         node = path.nodes[level];
10470         if (btrfs_header_bytenr(node) != bytenr) {
10471                 error(
10472         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10473                         bytenr, nodesize, bytenr,
10474                         btrfs_header_bytenr(node));
10475                 err |= REFERENCER_MISMATCH;
10476         }
10477         if (btrfs_header_level(node) != level) {
10478                 error(
10479         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10480                         bytenr, nodesize, level,
10481                         btrfs_header_level(node));
10482                 err |= REFERENCER_MISMATCH;
10483         }
10484
10485 release_out:
10486         btrfs_release_path(&path);
10487 out:
10488         if (err & REFERENCER_MISSING) {
10489                 if (level < 0)
10490                         error("extent [%llu %d] lost referencer (owner: %llu)",
10491                                 bytenr, nodesize, root_id);
10492                 else
10493                         error(
10494                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10495                                 bytenr, nodesize, root_id, level);
10496         }
10497
10498         return err;
10499 }
10500
10501 /*
10502  * Check if tree block @eb is tree reloc root.
10503  * Return 0 if it's not or any problem happens
10504  * Return 1 if it's a tree reloc root
10505  */
10506 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10507                                  struct extent_buffer *eb)
10508 {
10509         struct btrfs_root *tree_reloc_root;
10510         struct btrfs_key key;
10511         u64 bytenr = btrfs_header_bytenr(eb);
10512         u64 owner = btrfs_header_owner(eb);
10513         int ret = 0;
10514
10515         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10516         key.offset = owner;
10517         key.type = BTRFS_ROOT_ITEM_KEY;
10518
10519         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10520         if (IS_ERR(tree_reloc_root))
10521                 return 0;
10522
10523         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10524                 ret = 1;
10525         btrfs_free_fs_root(tree_reloc_root);
10526         return ret;
10527 }
10528
10529 /*
10530  * Check referencer for shared block backref
10531  * If level == -1, this function will resolve the level.
10532  */
10533 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10534                                      u64 parent, u64 bytenr, int level)
10535 {
10536         struct extent_buffer *eb;
10537         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10538         u32 nr;
10539         int found_parent = 0;
10540         int i;
10541
10542         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10543         if (!extent_buffer_uptodate(eb))
10544                 goto out;
10545
10546         if (level == -1)
10547                 level = query_tree_block_level(fs_info, bytenr);
10548         if (level < 0)
10549                 goto out;
10550
10551         /* It's possible it's a tree reloc root */
10552         if (parent == bytenr) {
10553                 if (is_tree_reloc_root(fs_info, eb))
10554                         found_parent = 1;
10555                 goto out;
10556         }
10557
10558         if (level + 1 != btrfs_header_level(eb))
10559                 goto out;
10560
10561         nr = btrfs_header_nritems(eb);
10562         for (i = 0; i < nr; i++) {
10563                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10564                         found_parent = 1;
10565                         break;
10566                 }
10567         }
10568 out:
10569         free_extent_buffer(eb);
10570         if (!found_parent) {
10571                 error(
10572         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10573                         bytenr, nodesize, parent, level);
10574                 return REFERENCER_MISSING;
10575         }
10576         return 0;
10577 }
10578
10579 /*
10580  * Check referencer for normal (inlined) data ref
10581  * If len == 0, it will be resolved by searching in extent tree
10582  */
10583 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10584                                      u64 root_id, u64 objectid, u64 offset,
10585                                      u64 bytenr, u64 len, u32 count)
10586 {
10587         struct btrfs_root *root;
10588         struct btrfs_root *extent_root = fs_info->extent_root;
10589         struct btrfs_key key;
10590         struct btrfs_path path;
10591         struct extent_buffer *leaf;
10592         struct btrfs_file_extent_item *fi;
10593         u32 found_count = 0;
10594         int slot;
10595         int ret = 0;
10596
10597         if (!len) {
10598                 key.objectid = bytenr;
10599                 key.type = BTRFS_EXTENT_ITEM_KEY;
10600                 key.offset = (u64)-1;
10601
10602                 btrfs_init_path(&path);
10603                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10604                 if (ret < 0)
10605                         goto out;
10606                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10607                 if (ret)
10608                         goto out;
10609                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10610                 if (key.objectid != bytenr ||
10611                     key.type != BTRFS_EXTENT_ITEM_KEY)
10612                         goto out;
10613                 len = key.offset;
10614                 btrfs_release_path(&path);
10615         }
10616         key.objectid = root_id;
10617         key.type = BTRFS_ROOT_ITEM_KEY;
10618         key.offset = (u64)-1;
10619         btrfs_init_path(&path);
10620
10621         root = btrfs_read_fs_root(fs_info, &key);
10622         if (IS_ERR(root))
10623                 goto out;
10624
10625         key.objectid = objectid;
10626         key.type = BTRFS_EXTENT_DATA_KEY;
10627         /*
10628          * It can be nasty as data backref offset is
10629          * file offset - file extent offset, which is smaller or
10630          * equal to original backref offset.  The only special case is
10631          * overflow.  So we need to special check and do further search.
10632          */
10633         key.offset = offset & (1ULL << 63) ? 0 : offset;
10634
10635         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10636         if (ret < 0)
10637                 goto out;
10638
10639         /*
10640          * Search afterwards to get correct one
10641          * NOTE: As we must do a comprehensive check on the data backref to
10642          * make sure the dref count also matches, we must iterate all file
10643          * extents for that inode.
10644          */
10645         while (1) {
10646                 leaf = path.nodes[0];
10647                 slot = path.slots[0];
10648
10649                 if (slot >= btrfs_header_nritems(leaf))
10650                         goto next;
10651                 btrfs_item_key_to_cpu(leaf, &key, slot);
10652                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10653                         break;
10654                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10655                 /*
10656                  * Except normal disk bytenr and disk num bytes, we still
10657                  * need to do extra check on dbackref offset as
10658                  * dbackref offset = file_offset - file_extent_offset
10659                  */
10660                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10661                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10662                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10663                     offset)
10664                         found_count++;
10665
10666 next:
10667                 ret = btrfs_next_item(root, &path);
10668                 if (ret)
10669                         break;
10670         }
10671 out:
10672         btrfs_release_path(&path);
10673         if (found_count != count) {
10674                 error(
10675 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10676                         bytenr, len, root_id, objectid, offset, count, found_count);
10677                 return REFERENCER_MISSING;
10678         }
10679         return 0;
10680 }
10681
10682 /*
10683  * Check if the referencer of a shared data backref exists
10684  */
10685 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10686                                      u64 parent, u64 bytenr)
10687 {
10688         struct extent_buffer *eb;
10689         struct btrfs_key key;
10690         struct btrfs_file_extent_item *fi;
10691         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10692         u32 nr;
10693         int found_parent = 0;
10694         int i;
10695
10696         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10697         if (!extent_buffer_uptodate(eb))
10698                 goto out;
10699
10700         nr = btrfs_header_nritems(eb);
10701         for (i = 0; i < nr; i++) {
10702                 btrfs_item_key_to_cpu(eb, &key, i);
10703                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10704                         continue;
10705
10706                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10707                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10708                         continue;
10709
10710                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10711                         found_parent = 1;
10712                         break;
10713                 }
10714         }
10715
10716 out:
10717         free_extent_buffer(eb);
10718         if (!found_parent) {
10719                 error("shared extent %llu referencer lost (parent: %llu)",
10720                         bytenr, parent);
10721                 return REFERENCER_MISSING;
10722         }
10723         return 0;
10724 }
10725
10726 /*
10727  * This function will check a given extent item, including its backref and
10728  * itself (like crossing stripe boundary and type)
10729  *
10730  * Since we don't use extent_record anymore, introduce new error bit
10731  */
10732 static int check_extent_item(struct btrfs_fs_info *fs_info,
10733                              struct extent_buffer *eb, int slot)
10734 {
10735         struct btrfs_extent_item *ei;
10736         struct btrfs_extent_inline_ref *iref;
10737         struct btrfs_extent_data_ref *dref;
10738         unsigned long end;
10739         unsigned long ptr;
10740         int type;
10741         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10742         u32 item_size = btrfs_item_size_nr(eb, slot);
10743         u64 flags;
10744         u64 offset;
10745         int metadata = 0;
10746         int level;
10747         struct btrfs_key key;
10748         int ret;
10749         int err = 0;
10750
10751         btrfs_item_key_to_cpu(eb, &key, slot);
10752         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10753                 bytes_used += key.offset;
10754         else
10755                 bytes_used += nodesize;
10756
10757         if (item_size < sizeof(*ei)) {
10758                 /*
10759                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10760                  * old thing when on disk format is still un-determined.
10761                  * No need to care about it anymore
10762                  */
10763                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10764                 return -ENOTTY;
10765         }
10766
10767         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10768         flags = btrfs_extent_flags(eb, ei);
10769
10770         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10771                 metadata = 1;
10772         if (metadata && check_crossing_stripes(global_info, key.objectid,
10773                                                eb->len)) {
10774                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10775                       key.objectid, key.objectid + nodesize);
10776                 err |= CROSSING_STRIPE_BOUNDARY;
10777         }
10778
10779         ptr = (unsigned long)(ei + 1);
10780
10781         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10782                 /* Old EXTENT_ITEM metadata */
10783                 struct btrfs_tree_block_info *info;
10784
10785                 info = (struct btrfs_tree_block_info *)ptr;
10786                 level = btrfs_tree_block_level(eb, info);
10787                 ptr += sizeof(struct btrfs_tree_block_info);
10788         } else {
10789                 /* New METADATA_ITEM */
10790                 level = key.offset;
10791         }
10792         end = (unsigned long)ei + item_size;
10793
10794 next:
10795         /* Reached extent item end normally */
10796         if (ptr == end)
10797                 goto out;
10798
10799         /* Beyond extent item end, wrong item size */
10800         if (ptr > end) {
10801                 err |= ITEM_SIZE_MISMATCH;
10802                 error("extent item at bytenr %llu slot %d has wrong size",
10803                         eb->start, slot);
10804                 goto out;
10805         }
10806
10807         /* Now check every backref in this extent item */
10808         iref = (struct btrfs_extent_inline_ref *)ptr;
10809         type = btrfs_extent_inline_ref_type(eb, iref);
10810         offset = btrfs_extent_inline_ref_offset(eb, iref);
10811         switch (type) {
10812         case BTRFS_TREE_BLOCK_REF_KEY:
10813                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10814                                                level);
10815                 err |= ret;
10816                 break;
10817         case BTRFS_SHARED_BLOCK_REF_KEY:
10818                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10819                                                  level);
10820                 err |= ret;
10821                 break;
10822         case BTRFS_EXTENT_DATA_REF_KEY:
10823                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10824                 ret = check_extent_data_backref(fs_info,
10825                                 btrfs_extent_data_ref_root(eb, dref),
10826                                 btrfs_extent_data_ref_objectid(eb, dref),
10827                                 btrfs_extent_data_ref_offset(eb, dref),
10828                                 key.objectid, key.offset,
10829                                 btrfs_extent_data_ref_count(eb, dref));
10830                 err |= ret;
10831                 break;
10832         case BTRFS_SHARED_DATA_REF_KEY:
10833                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10834                 err |= ret;
10835                 break;
10836         default:
10837                 error("extent[%llu %d %llu] has unknown ref type: %d",
10838                         key.objectid, key.type, key.offset, type);
10839                 err |= UNKNOWN_TYPE;
10840                 goto out;
10841         }
10842
10843         ptr += btrfs_extent_inline_ref_size(type);
10844         goto next;
10845
10846 out:
10847         return err;
10848 }
10849
10850 /*
10851  * Check if a dev extent item is referred correctly by its chunk
10852  */
10853 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10854                                  struct extent_buffer *eb, int slot)
10855 {
10856         struct btrfs_root *chunk_root = fs_info->chunk_root;
10857         struct btrfs_dev_extent *ptr;
10858         struct btrfs_path path;
10859         struct btrfs_key chunk_key;
10860         struct btrfs_key devext_key;
10861         struct btrfs_chunk *chunk;
10862         struct extent_buffer *l;
10863         int num_stripes;
10864         u64 length;
10865         int i;
10866         int found_chunk = 0;
10867         int ret;
10868
10869         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10870         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10871         length = btrfs_dev_extent_length(eb, ptr);
10872
10873         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10874         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10875         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10876
10877         btrfs_init_path(&path);
10878         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10879         if (ret)
10880                 goto out;
10881
10882         l = path.nodes[0];
10883         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10884         if (btrfs_chunk_length(l, chunk) != length)
10885                 goto out;
10886
10887         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10888         for (i = 0; i < num_stripes; i++) {
10889                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10890                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10891
10892                 if (devid == devext_key.objectid &&
10893                     offset == devext_key.offset) {
10894                         found_chunk = 1;
10895                         break;
10896                 }
10897         }
10898 out:
10899         btrfs_release_path(&path);
10900         if (!found_chunk) {
10901                 error(
10902                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10903                         devext_key.objectid, devext_key.offset, length);
10904                 return REFERENCER_MISSING;
10905         }
10906         return 0;
10907 }
10908
10909 /*
10910  * Check if the used space is correct with the dev item
10911  */
10912 static int check_dev_item(struct btrfs_fs_info *fs_info,
10913                           struct extent_buffer *eb, int slot)
10914 {
10915         struct btrfs_root *dev_root = fs_info->dev_root;
10916         struct btrfs_dev_item *dev_item;
10917         struct btrfs_path path;
10918         struct btrfs_key key;
10919         struct btrfs_dev_extent *ptr;
10920         u64 dev_id;
10921         u64 used;
10922         u64 total = 0;
10923         int ret;
10924
10925         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10926         dev_id = btrfs_device_id(eb, dev_item);
10927         used = btrfs_device_bytes_used(eb, dev_item);
10928
10929         key.objectid = dev_id;
10930         key.type = BTRFS_DEV_EXTENT_KEY;
10931         key.offset = 0;
10932
10933         btrfs_init_path(&path);
10934         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10935         if (ret < 0) {
10936                 btrfs_item_key_to_cpu(eb, &key, slot);
10937                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10938                         key.objectid, key.type, key.offset);
10939                 btrfs_release_path(&path);
10940                 return REFERENCER_MISSING;
10941         }
10942
10943         /* Iterate dev_extents to calculate the used space of a device */
10944         while (1) {
10945                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10946                         goto next;
10947
10948                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10949                 if (key.objectid > dev_id)
10950                         break;
10951                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10952                         goto next;
10953
10954                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10955                                      struct btrfs_dev_extent);
10956                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10957 next:
10958                 ret = btrfs_next_item(dev_root, &path);
10959                 if (ret)
10960                         break;
10961         }
10962         btrfs_release_path(&path);
10963
10964         if (used != total) {
10965                 btrfs_item_key_to_cpu(eb, &key, slot);
10966                 error(
10967 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10968                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10969                         BTRFS_DEV_EXTENT_KEY, dev_id);
10970                 return ACCOUNTING_MISMATCH;
10971         }
10972         return 0;
10973 }
10974
10975 /*
10976  * Check a block group item with its referener (chunk) and its used space
10977  * with extent/metadata item
10978  */
10979 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10980                                   struct extent_buffer *eb, int slot)
10981 {
10982         struct btrfs_root *extent_root = fs_info->extent_root;
10983         struct btrfs_root *chunk_root = fs_info->chunk_root;
10984         struct btrfs_block_group_item *bi;
10985         struct btrfs_block_group_item bg_item;
10986         struct btrfs_path path;
10987         struct btrfs_key bg_key;
10988         struct btrfs_key chunk_key;
10989         struct btrfs_key extent_key;
10990         struct btrfs_chunk *chunk;
10991         struct extent_buffer *leaf;
10992         struct btrfs_extent_item *ei;
10993         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10994         u64 flags;
10995         u64 bg_flags;
10996         u64 used;
10997         u64 total = 0;
10998         int ret;
10999         int err = 0;
11000
11001         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11002         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11003         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11004         used = btrfs_block_group_used(&bg_item);
11005         bg_flags = btrfs_block_group_flags(&bg_item);
11006
11007         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11008         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11009         chunk_key.offset = bg_key.objectid;
11010
11011         btrfs_init_path(&path);
11012         /* Search for the referencer chunk */
11013         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11014         if (ret) {
11015                 error(
11016                 "block group[%llu %llu] did not find the related chunk item",
11017                         bg_key.objectid, bg_key.offset);
11018                 err |= REFERENCER_MISSING;
11019         } else {
11020                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11021                                         struct btrfs_chunk);
11022                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11023                                                 bg_key.offset) {
11024                         error(
11025         "block group[%llu %llu] related chunk item length does not match",
11026                                 bg_key.objectid, bg_key.offset);
11027                         err |= REFERENCER_MISMATCH;
11028                 }
11029         }
11030         btrfs_release_path(&path);
11031
11032         /* Search from the block group bytenr */
11033         extent_key.objectid = bg_key.objectid;
11034         extent_key.type = 0;
11035         extent_key.offset = 0;
11036
11037         btrfs_init_path(&path);
11038         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11039         if (ret < 0)
11040                 goto out;
11041
11042         /* Iterate extent tree to account used space */
11043         while (1) {
11044                 leaf = path.nodes[0];
11045
11046                 /* Search slot can point to the last item beyond leaf nritems */
11047                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11048                         goto next;
11049
11050                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11051                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11052                         break;
11053
11054                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11055                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11056                         goto next;
11057                 if (extent_key.objectid < bg_key.objectid)
11058                         goto next;
11059
11060                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11061                         total += nodesize;
11062                 else
11063                         total += extent_key.offset;
11064
11065                 ei = btrfs_item_ptr(leaf, path.slots[0],
11066                                     struct btrfs_extent_item);
11067                 flags = btrfs_extent_flags(leaf, ei);
11068                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11069                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11070                                 error(
11071                         "bad extent[%llu, %llu) type mismatch with chunk",
11072                                         extent_key.objectid,
11073                                         extent_key.objectid + extent_key.offset);
11074                                 err |= CHUNK_TYPE_MISMATCH;
11075                         }
11076                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11077                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11078                                     BTRFS_BLOCK_GROUP_METADATA))) {
11079                                 error(
11080                         "bad extent[%llu, %llu) type mismatch with chunk",
11081                                         extent_key.objectid,
11082                                         extent_key.objectid + nodesize);
11083                                 err |= CHUNK_TYPE_MISMATCH;
11084                         }
11085                 }
11086 next:
11087                 ret = btrfs_next_item(extent_root, &path);
11088                 if (ret)
11089                         break;
11090         }
11091
11092 out:
11093         btrfs_release_path(&path);
11094
11095         if (total != used) {
11096                 error(
11097                 "block group[%llu %llu] used %llu but extent items used %llu",
11098                         bg_key.objectid, bg_key.offset, used, total);
11099                 err |= ACCOUNTING_MISMATCH;
11100         }
11101         return err;
11102 }
11103
11104 /*
11105  * Check a chunk item.
11106  * Including checking all referred dev_extents and block group
11107  */
11108 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11109                             struct extent_buffer *eb, int slot)
11110 {
11111         struct btrfs_root *extent_root = fs_info->extent_root;
11112         struct btrfs_root *dev_root = fs_info->dev_root;
11113         struct btrfs_path path;
11114         struct btrfs_key chunk_key;
11115         struct btrfs_key bg_key;
11116         struct btrfs_key devext_key;
11117         struct btrfs_chunk *chunk;
11118         struct extent_buffer *leaf;
11119         struct btrfs_block_group_item *bi;
11120         struct btrfs_block_group_item bg_item;
11121         struct btrfs_dev_extent *ptr;
11122         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11123         u64 length;
11124         u64 chunk_end;
11125         u64 type;
11126         u64 profile;
11127         int num_stripes;
11128         u64 offset;
11129         u64 objectid;
11130         int i;
11131         int ret;
11132         int err = 0;
11133
11134         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11135         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11136         length = btrfs_chunk_length(eb, chunk);
11137         chunk_end = chunk_key.offset + length;
11138         if (!IS_ALIGNED(length, sectorsize)) {
11139                 error("chunk[%llu %llu) not aligned to %u",
11140                         chunk_key.offset, chunk_end, sectorsize);
11141                 err |= BYTES_UNALIGNED;
11142                 goto out;
11143         }
11144
11145         type = btrfs_chunk_type(eb, chunk);
11146         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11147         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11148                 error("chunk[%llu %llu) has no chunk type",
11149                         chunk_key.offset, chunk_end);
11150                 err |= UNKNOWN_TYPE;
11151         }
11152         if (profile && (profile & (profile - 1))) {
11153                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11154                         chunk_key.offset, chunk_end, profile);
11155                 err |= UNKNOWN_TYPE;
11156         }
11157
11158         bg_key.objectid = chunk_key.offset;
11159         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11160         bg_key.offset = length;
11161
11162         btrfs_init_path(&path);
11163         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11164         if (ret) {
11165                 error(
11166                 "chunk[%llu %llu) did not find the related block group item",
11167                         chunk_key.offset, chunk_end);
11168                 err |= REFERENCER_MISSING;
11169         } else{
11170                 leaf = path.nodes[0];
11171                 bi = btrfs_item_ptr(leaf, path.slots[0],
11172                                     struct btrfs_block_group_item);
11173                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11174                                    sizeof(bg_item));
11175                 if (btrfs_block_group_flags(&bg_item) != type) {
11176                         error(
11177 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11178                                 chunk_key.offset, chunk_end, type,
11179                                 btrfs_block_group_flags(&bg_item));
11180                         err |= REFERENCER_MISSING;
11181                 }
11182         }
11183
11184         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11185         for (i = 0; i < num_stripes; i++) {
11186                 btrfs_release_path(&path);
11187                 btrfs_init_path(&path);
11188                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11189                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11190                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11191
11192                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11193                                         0, 0);
11194                 if (ret)
11195                         goto not_match_dev;
11196
11197                 leaf = path.nodes[0];
11198                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11199                                      struct btrfs_dev_extent);
11200                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11201                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11202                 if (objectid != chunk_key.objectid ||
11203                     offset != chunk_key.offset ||
11204                     btrfs_dev_extent_length(leaf, ptr) != length)
11205                         goto not_match_dev;
11206                 continue;
11207 not_match_dev:
11208                 err |= BACKREF_MISSING;
11209                 error(
11210                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11211                         chunk_key.objectid, chunk_end, i);
11212                 continue;
11213         }
11214         btrfs_release_path(&path);
11215 out:
11216         return err;
11217 }
11218
11219 /*
11220  * Main entry function to check known items and update related accounting info
11221  */
11222 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11223 {
11224         struct btrfs_fs_info *fs_info = root->fs_info;
11225         struct btrfs_key key;
11226         int slot = 0;
11227         int type;
11228         struct btrfs_extent_data_ref *dref;
11229         int ret;
11230         int err = 0;
11231
11232 next:
11233         btrfs_item_key_to_cpu(eb, &key, slot);
11234         type = key.type;
11235
11236         switch (type) {
11237         case BTRFS_EXTENT_DATA_KEY:
11238                 ret = check_extent_data_item(root, eb, slot);
11239                 err |= ret;
11240                 break;
11241         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11242                 ret = check_block_group_item(fs_info, eb, slot);
11243                 err |= ret;
11244                 break;
11245         case BTRFS_DEV_ITEM_KEY:
11246                 ret = check_dev_item(fs_info, eb, slot);
11247                 err |= ret;
11248                 break;
11249         case BTRFS_CHUNK_ITEM_KEY:
11250                 ret = check_chunk_item(fs_info, eb, slot);
11251                 err |= ret;
11252                 break;
11253         case BTRFS_DEV_EXTENT_KEY:
11254                 ret = check_dev_extent_item(fs_info, eb, slot);
11255                 err |= ret;
11256                 break;
11257         case BTRFS_EXTENT_ITEM_KEY:
11258         case BTRFS_METADATA_ITEM_KEY:
11259                 ret = check_extent_item(fs_info, eb, slot);
11260                 err |= ret;
11261                 break;
11262         case BTRFS_EXTENT_CSUM_KEY:
11263                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11264                 break;
11265         case BTRFS_TREE_BLOCK_REF_KEY:
11266                 ret = check_tree_block_backref(fs_info, key.offset,
11267                                                key.objectid, -1);
11268                 err |= ret;
11269                 break;
11270         case BTRFS_EXTENT_DATA_REF_KEY:
11271                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11272                 ret = check_extent_data_backref(fs_info,
11273                                 btrfs_extent_data_ref_root(eb, dref),
11274                                 btrfs_extent_data_ref_objectid(eb, dref),
11275                                 btrfs_extent_data_ref_offset(eb, dref),
11276                                 key.objectid, 0,
11277                                 btrfs_extent_data_ref_count(eb, dref));
11278                 err |= ret;
11279                 break;
11280         case BTRFS_SHARED_BLOCK_REF_KEY:
11281                 ret = check_shared_block_backref(fs_info, key.offset,
11282                                                  key.objectid, -1);
11283                 err |= ret;
11284                 break;
11285         case BTRFS_SHARED_DATA_REF_KEY:
11286                 ret = check_shared_data_backref(fs_info, key.offset,
11287                                                 key.objectid);
11288                 err |= ret;
11289                 break;
11290         default:
11291                 break;
11292         }
11293
11294         if (++slot < btrfs_header_nritems(eb))
11295                 goto next;
11296
11297         return err;
11298 }
11299
11300 /*
11301  * Helper function for later fs/subvol tree check.  To determine if a tree
11302  * block should be checked.
11303  * This function will ensure only the direct referencer with lowest rootid to
11304  * check a fs/subvolume tree block.
11305  *
11306  * Backref check at extent tree would detect errors like missing subvolume
11307  * tree, so we can do aggressive check to reduce duplicated checks.
11308  */
11309 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11310 {
11311         struct btrfs_root *extent_root = root->fs_info->extent_root;
11312         struct btrfs_key key;
11313         struct btrfs_path path;
11314         struct extent_buffer *leaf;
11315         int slot;
11316         struct btrfs_extent_item *ei;
11317         unsigned long ptr;
11318         unsigned long end;
11319         int type;
11320         u32 item_size;
11321         u64 offset;
11322         struct btrfs_extent_inline_ref *iref;
11323         int ret;
11324
11325         btrfs_init_path(&path);
11326         key.objectid = btrfs_header_bytenr(eb);
11327         key.type = BTRFS_METADATA_ITEM_KEY;
11328         key.offset = (u64)-1;
11329
11330         /*
11331          * Any failure in backref resolving means we can't determine
11332          * whom the tree block belongs to.
11333          * So in that case, we need to check that tree block
11334          */
11335         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11336         if (ret < 0)
11337                 goto need_check;
11338
11339         ret = btrfs_previous_extent_item(extent_root, &path,
11340                                          btrfs_header_bytenr(eb));
11341         if (ret)
11342                 goto need_check;
11343
11344         leaf = path.nodes[0];
11345         slot = path.slots[0];
11346         btrfs_item_key_to_cpu(leaf, &key, slot);
11347         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11348
11349         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11350                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11351         } else {
11352                 struct btrfs_tree_block_info *info;
11353
11354                 info = (struct btrfs_tree_block_info *)(ei + 1);
11355                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11356         }
11357
11358         item_size = btrfs_item_size_nr(leaf, slot);
11359         ptr = (unsigned long)iref;
11360         end = (unsigned long)ei + item_size;
11361         while (ptr < end) {
11362                 iref = (struct btrfs_extent_inline_ref *)ptr;
11363                 type = btrfs_extent_inline_ref_type(leaf, iref);
11364                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11365
11366                 /*
11367                  * We only check the tree block if current root is
11368                  * the lowest referencer of it.
11369                  */
11370                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11371                     offset < root->objectid) {
11372                         btrfs_release_path(&path);
11373                         return 0;
11374                 }
11375
11376                 ptr += btrfs_extent_inline_ref_size(type);
11377         }
11378         /*
11379          * Normally we should also check keyed tree block ref, but that may be
11380          * very time consuming.  Inlined ref should already make us skip a lot
11381          * of refs now.  So skip search keyed tree block ref.
11382          */
11383
11384 need_check:
11385         btrfs_release_path(&path);
11386         return 1;
11387 }
11388
11389 /*
11390  * Traversal function for tree block. We will do:
11391  * 1) Skip shared fs/subvolume tree blocks
11392  * 2) Update related bytes accounting
11393  * 3) Pre-order traversal
11394  */
11395 static int traverse_tree_block(struct btrfs_root *root,
11396                                 struct extent_buffer *node)
11397 {
11398         struct extent_buffer *eb;
11399         struct btrfs_key key;
11400         struct btrfs_key drop_key;
11401         int level;
11402         u64 nr;
11403         int i;
11404         int err = 0;
11405         int ret;
11406
11407         /*
11408          * Skip shared fs/subvolume tree block, in that case they will
11409          * be checked by referencer with lowest rootid
11410          */
11411         if (is_fstree(root->objectid) && !should_check(root, node))
11412                 return 0;
11413
11414         /* Update bytes accounting */
11415         total_btree_bytes += node->len;
11416         if (fs_root_objectid(btrfs_header_owner(node)))
11417                 total_fs_tree_bytes += node->len;
11418         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11419                 total_extent_tree_bytes += node->len;
11420         if (!found_old_backref &&
11421             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11422             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11423             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11424                 found_old_backref = 1;
11425
11426         /* pre-order tranversal, check itself first */
11427         level = btrfs_header_level(node);
11428         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11429                                    btrfs_header_level(node),
11430                                    btrfs_header_owner(node));
11431         err |= ret;
11432         if (err)
11433                 error(
11434         "check %s failed root %llu bytenr %llu level %d, force continue check",
11435                         level ? "node":"leaf", root->objectid,
11436                         btrfs_header_bytenr(node), btrfs_header_level(node));
11437
11438         if (!level) {
11439                 btree_space_waste += btrfs_leaf_free_space(root, node);
11440                 ret = check_leaf_items(root, node);
11441                 err |= ret;
11442                 return err;
11443         }
11444
11445         nr = btrfs_header_nritems(node);
11446         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11447         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11448                 sizeof(struct btrfs_key_ptr);
11449
11450         /* Then check all its children */
11451         for (i = 0; i < nr; i++) {
11452                 u64 blocknr = btrfs_node_blockptr(node, i);
11453
11454                 btrfs_node_key_to_cpu(node, &key, i);
11455                 if (level == root->root_item.drop_level &&
11456                     is_dropped_key(&key, &drop_key))
11457                         continue;
11458
11459                 /*
11460                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11461                  * to call the function itself.
11462                  */
11463                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11464                 if (extent_buffer_uptodate(eb)) {
11465                         ret = traverse_tree_block(root, eb);
11466                         err |= ret;
11467                 }
11468                 free_extent_buffer(eb);
11469         }
11470
11471         return err;
11472 }
11473
11474 /*
11475  * Low memory usage version check_chunks_and_extents.
11476  */
11477 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11478 {
11479         struct btrfs_path path;
11480         struct btrfs_key key;
11481         struct btrfs_root *root1;
11482         struct btrfs_root *cur_root;
11483         int err = 0;
11484         int ret;
11485
11486         root1 = root->fs_info->chunk_root;
11487         ret = traverse_tree_block(root1, root1->node);
11488         err |= ret;
11489
11490         root1 = root->fs_info->tree_root;
11491         ret = traverse_tree_block(root1, root1->node);
11492         err |= ret;
11493
11494         btrfs_init_path(&path);
11495         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11496         key.offset = 0;
11497         key.type = BTRFS_ROOT_ITEM_KEY;
11498
11499         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11500         if (ret) {
11501                 error("cannot find extent treet in tree_root");
11502                 goto out;
11503         }
11504
11505         while (1) {
11506                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11507                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11508                         goto next;
11509                 key.offset = (u64)-1;
11510
11511                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11512                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11513                                         &key);
11514                 else
11515                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11516                 if (IS_ERR(cur_root) || !cur_root) {
11517                         error("failed to read tree: %lld", key.objectid);
11518                         goto next;
11519                 }
11520
11521                 ret = traverse_tree_block(cur_root, cur_root->node);
11522                 err |= ret;
11523
11524                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11525                         btrfs_free_fs_root(cur_root);
11526 next:
11527                 ret = btrfs_next_item(root1, &path);
11528                 if (ret)
11529                         goto out;
11530         }
11531
11532 out:
11533         btrfs_release_path(&path);
11534         return err;
11535 }
11536
11537 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11538                            struct btrfs_root *root, int overwrite)
11539 {
11540         struct extent_buffer *c;
11541         struct extent_buffer *old = root->node;
11542         int level;
11543         int ret;
11544         struct btrfs_disk_key disk_key = {0,0,0};
11545
11546         level = 0;
11547
11548         if (overwrite) {
11549                 c = old;
11550                 extent_buffer_get(c);
11551                 goto init;
11552         }
11553         c = btrfs_alloc_free_block(trans, root,
11554                                    root->nodesize,
11555                                    root->root_key.objectid,
11556                                    &disk_key, level, 0, 0);
11557         if (IS_ERR(c)) {
11558                 c = old;
11559                 extent_buffer_get(c);
11560                 overwrite = 1;
11561         }
11562 init:
11563         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11564         btrfs_set_header_level(c, level);
11565         btrfs_set_header_bytenr(c, c->start);
11566         btrfs_set_header_generation(c, trans->transid);
11567         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11568         btrfs_set_header_owner(c, root->root_key.objectid);
11569
11570         write_extent_buffer(c, root->fs_info->fsid,
11571                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11572
11573         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11574                             btrfs_header_chunk_tree_uuid(c),
11575                             BTRFS_UUID_SIZE);
11576
11577         btrfs_mark_buffer_dirty(c);
11578         /*
11579          * this case can happen in the following case:
11580          *
11581          * 1.overwrite previous root.
11582          *
11583          * 2.reinit reloc data root, this is because we skip pin
11584          * down reloc data tree before which means we can allocate
11585          * same block bytenr here.
11586          */
11587         if (old->start == c->start) {
11588                 btrfs_set_root_generation(&root->root_item,
11589                                           trans->transid);
11590                 root->root_item.level = btrfs_header_level(root->node);
11591                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11592                                         &root->root_key, &root->root_item);
11593                 if (ret) {
11594                         free_extent_buffer(c);
11595                         return ret;
11596                 }
11597         }
11598         free_extent_buffer(old);
11599         root->node = c;
11600         add_root_to_dirty_list(root);
11601         return 0;
11602 }
11603
11604 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11605                                 struct extent_buffer *eb, int tree_root)
11606 {
11607         struct extent_buffer *tmp;
11608         struct btrfs_root_item *ri;
11609         struct btrfs_key key;
11610         u64 bytenr;
11611         u32 nodesize;
11612         int level = btrfs_header_level(eb);
11613         int nritems;
11614         int ret;
11615         int i;
11616
11617         /*
11618          * If we have pinned this block before, don't pin it again.
11619          * This can not only avoid forever loop with broken filesystem
11620          * but also give us some speedups.
11621          */
11622         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11623                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11624                 return 0;
11625
11626         btrfs_pin_extent(fs_info, eb->start, eb->len);
11627
11628         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11629         nritems = btrfs_header_nritems(eb);
11630         for (i = 0; i < nritems; i++) {
11631                 if (level == 0) {
11632                         btrfs_item_key_to_cpu(eb, &key, i);
11633                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11634                                 continue;
11635                         /* Skip the extent root and reloc roots */
11636                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11637                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11638                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11639                                 continue;
11640                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11641                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11642
11643                         /*
11644                          * If at any point we start needing the real root we
11645                          * will have to build a stump root for the root we are
11646                          * in, but for now this doesn't actually use the root so
11647                          * just pass in extent_root.
11648                          */
11649                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11650                                               nodesize, 0);
11651                         if (!extent_buffer_uptodate(tmp)) {
11652                                 fprintf(stderr, "Error reading root block\n");
11653                                 return -EIO;
11654                         }
11655                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11656                         free_extent_buffer(tmp);
11657                         if (ret)
11658                                 return ret;
11659                 } else {
11660                         bytenr = btrfs_node_blockptr(eb, i);
11661
11662                         /* If we aren't the tree root don't read the block */
11663                         if (level == 1 && !tree_root) {
11664                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11665                                 continue;
11666                         }
11667
11668                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11669                                               nodesize, 0);
11670                         if (!extent_buffer_uptodate(tmp)) {
11671                                 fprintf(stderr, "Error reading tree block\n");
11672                                 return -EIO;
11673                         }
11674                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11675                         free_extent_buffer(tmp);
11676                         if (ret)
11677                                 return ret;
11678                 }
11679         }
11680
11681         return 0;
11682 }
11683
11684 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11685 {
11686         int ret;
11687
11688         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11689         if (ret)
11690                 return ret;
11691
11692         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11693 }
11694
11695 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11696 {
11697         struct btrfs_block_group_cache *cache;
11698         struct btrfs_path path;
11699         struct extent_buffer *leaf;
11700         struct btrfs_chunk *chunk;
11701         struct btrfs_key key;
11702         int ret;
11703         u64 start;
11704
11705         btrfs_init_path(&path);
11706         key.objectid = 0;
11707         key.type = BTRFS_CHUNK_ITEM_KEY;
11708         key.offset = 0;
11709         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11710         if (ret < 0) {
11711                 btrfs_release_path(&path);
11712                 return ret;
11713         }
11714
11715         /*
11716          * We do this in case the block groups were screwed up and had alloc
11717          * bits that aren't actually set on the chunks.  This happens with
11718          * restored images every time and could happen in real life I guess.
11719          */
11720         fs_info->avail_data_alloc_bits = 0;
11721         fs_info->avail_metadata_alloc_bits = 0;
11722         fs_info->avail_system_alloc_bits = 0;
11723
11724         /* First we need to create the in-memory block groups */
11725         while (1) {
11726                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11727                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11728                         if (ret < 0) {
11729                                 btrfs_release_path(&path);
11730                                 return ret;
11731                         }
11732                         if (ret) {
11733                                 ret = 0;
11734                                 break;
11735                         }
11736                 }
11737                 leaf = path.nodes[0];
11738                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11739                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11740                         path.slots[0]++;
11741                         continue;
11742                 }
11743
11744                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11745                 btrfs_add_block_group(fs_info, 0,
11746                                       btrfs_chunk_type(leaf, chunk),
11747                                       key.objectid, key.offset,
11748                                       btrfs_chunk_length(leaf, chunk));
11749                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11750                                  key.offset + btrfs_chunk_length(leaf, chunk));
11751                 path.slots[0]++;
11752         }
11753         start = 0;
11754         while (1) {
11755                 cache = btrfs_lookup_first_block_group(fs_info, start);
11756                 if (!cache)
11757                         break;
11758                 cache->cached = 1;
11759                 start = cache->key.objectid + cache->key.offset;
11760         }
11761
11762         btrfs_release_path(&path);
11763         return 0;
11764 }
11765
11766 static int reset_balance(struct btrfs_trans_handle *trans,
11767                          struct btrfs_fs_info *fs_info)
11768 {
11769         struct btrfs_root *root = fs_info->tree_root;
11770         struct btrfs_path path;
11771         struct extent_buffer *leaf;
11772         struct btrfs_key key;
11773         int del_slot, del_nr = 0;
11774         int ret;
11775         int found = 0;
11776
11777         btrfs_init_path(&path);
11778         key.objectid = BTRFS_BALANCE_OBJECTID;
11779         key.type = BTRFS_BALANCE_ITEM_KEY;
11780         key.offset = 0;
11781         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11782         if (ret) {
11783                 if (ret > 0)
11784                         ret = 0;
11785                 if (!ret)
11786                         goto reinit_data_reloc;
11787                 else
11788                         goto out;
11789         }
11790
11791         ret = btrfs_del_item(trans, root, &path);
11792         if (ret)
11793                 goto out;
11794         btrfs_release_path(&path);
11795
11796         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11797         key.type = BTRFS_ROOT_ITEM_KEY;
11798         key.offset = 0;
11799         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11800         if (ret < 0)
11801                 goto out;
11802         while (1) {
11803                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11804                         if (!found)
11805                                 break;
11806
11807                         if (del_nr) {
11808                                 ret = btrfs_del_items(trans, root, &path,
11809                                                       del_slot, del_nr);
11810                                 del_nr = 0;
11811                                 if (ret)
11812                                         goto out;
11813                         }
11814                         key.offset++;
11815                         btrfs_release_path(&path);
11816
11817                         found = 0;
11818                         ret = btrfs_search_slot(trans, root, &key, &path,
11819                                                 -1, 1);
11820                         if (ret < 0)
11821                                 goto out;
11822                         continue;
11823                 }
11824                 found = 1;
11825                 leaf = path.nodes[0];
11826                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11827                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11828                         break;
11829                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11830                         path.slots[0]++;
11831                         continue;
11832                 }
11833                 if (!del_nr) {
11834                         del_slot = path.slots[0];
11835                         del_nr = 1;
11836                 } else {
11837                         del_nr++;
11838                 }
11839                 path.slots[0]++;
11840         }
11841
11842         if (del_nr) {
11843                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11844                 if (ret)
11845                         goto out;
11846         }
11847         btrfs_release_path(&path);
11848
11849 reinit_data_reloc:
11850         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11851         key.type = BTRFS_ROOT_ITEM_KEY;
11852         key.offset = (u64)-1;
11853         root = btrfs_read_fs_root(fs_info, &key);
11854         if (IS_ERR(root)) {
11855                 fprintf(stderr, "Error reading data reloc tree\n");
11856                 ret = PTR_ERR(root);
11857                 goto out;
11858         }
11859         record_root_in_trans(trans, root);
11860         ret = btrfs_fsck_reinit_root(trans, root, 0);
11861         if (ret)
11862                 goto out;
11863         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11864 out:
11865         btrfs_release_path(&path);
11866         return ret;
11867 }
11868
11869 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11870                               struct btrfs_fs_info *fs_info)
11871 {
11872         u64 start = 0;
11873         int ret;
11874
11875         /*
11876          * The only reason we don't do this is because right now we're just
11877          * walking the trees we find and pinning down their bytes, we don't look
11878          * at any of the leaves.  In order to do mixed groups we'd have to check
11879          * the leaves of any fs roots and pin down the bytes for any file
11880          * extents we find.  Not hard but why do it if we don't have to?
11881          */
11882         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11883                 fprintf(stderr, "We don't support re-initing the extent tree "
11884                         "for mixed block groups yet, please notify a btrfs "
11885                         "developer you want to do this so they can add this "
11886                         "functionality.\n");
11887                 return -EINVAL;
11888         }
11889
11890         /*
11891          * first we need to walk all of the trees except the extent tree and pin
11892          * down the bytes that are in use so we don't overwrite any existing
11893          * metadata.
11894          */
11895         ret = pin_metadata_blocks(fs_info);
11896         if (ret) {
11897                 fprintf(stderr, "error pinning down used bytes\n");
11898                 return ret;
11899         }
11900
11901         /*
11902          * Need to drop all the block groups since we're going to recreate all
11903          * of them again.
11904          */
11905         btrfs_free_block_groups(fs_info);
11906         ret = reset_block_groups(fs_info);
11907         if (ret) {
11908                 fprintf(stderr, "error resetting the block groups\n");
11909                 return ret;
11910         }
11911
11912         /* Ok we can allocate now, reinit the extent root */
11913         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11914         if (ret) {
11915                 fprintf(stderr, "extent root initialization failed\n");
11916                 /*
11917                  * When the transaction code is updated we should end the
11918                  * transaction, but for now progs only knows about commit so
11919                  * just return an error.
11920                  */
11921                 return ret;
11922         }
11923
11924         /*
11925          * Now we have all the in-memory block groups setup so we can make
11926          * allocations properly, and the metadata we care about is safe since we
11927          * pinned all of it above.
11928          */
11929         while (1) {
11930                 struct btrfs_block_group_cache *cache;
11931
11932                 cache = btrfs_lookup_first_block_group(fs_info, start);
11933                 if (!cache)
11934                         break;
11935                 start = cache->key.objectid + cache->key.offset;
11936                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11937                                         &cache->key, &cache->item,
11938                                         sizeof(cache->item));
11939                 if (ret) {
11940                         fprintf(stderr, "Error adding block group\n");
11941                         return ret;
11942                 }
11943                 btrfs_extent_post_op(trans, fs_info->extent_root);
11944         }
11945
11946         ret = reset_balance(trans, fs_info);
11947         if (ret)
11948                 fprintf(stderr, "error resetting the pending balance\n");
11949
11950         return ret;
11951 }
11952
11953 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11954 {
11955         struct btrfs_path path;
11956         struct btrfs_trans_handle *trans;
11957         struct btrfs_key key;
11958         int ret;
11959
11960         printf("Recowing metadata block %llu\n", eb->start);
11961         key.objectid = btrfs_header_owner(eb);
11962         key.type = BTRFS_ROOT_ITEM_KEY;
11963         key.offset = (u64)-1;
11964
11965         root = btrfs_read_fs_root(root->fs_info, &key);
11966         if (IS_ERR(root)) {
11967                 fprintf(stderr, "Couldn't find owner root %llu\n",
11968                         key.objectid);
11969                 return PTR_ERR(root);
11970         }
11971
11972         trans = btrfs_start_transaction(root, 1);
11973         if (IS_ERR(trans))
11974                 return PTR_ERR(trans);
11975
11976         btrfs_init_path(&path);
11977         path.lowest_level = btrfs_header_level(eb);
11978         if (path.lowest_level)
11979                 btrfs_node_key_to_cpu(eb, &key, 0);
11980         else
11981                 btrfs_item_key_to_cpu(eb, &key, 0);
11982
11983         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11984         btrfs_commit_transaction(trans, root);
11985         btrfs_release_path(&path);
11986         return ret;
11987 }
11988
11989 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11990 {
11991         struct btrfs_path path;
11992         struct btrfs_trans_handle *trans;
11993         struct btrfs_key key;
11994         int ret;
11995
11996         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11997                bad->key.type, bad->key.offset);
11998         key.objectid = bad->root_id;
11999         key.type = BTRFS_ROOT_ITEM_KEY;
12000         key.offset = (u64)-1;
12001
12002         root = btrfs_read_fs_root(root->fs_info, &key);
12003         if (IS_ERR(root)) {
12004                 fprintf(stderr, "Couldn't find owner root %llu\n",
12005                         key.objectid);
12006                 return PTR_ERR(root);
12007         }
12008
12009         trans = btrfs_start_transaction(root, 1);
12010         if (IS_ERR(trans))
12011                 return PTR_ERR(trans);
12012
12013         btrfs_init_path(&path);
12014         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12015         if (ret) {
12016                 if (ret > 0)
12017                         ret = 0;
12018                 goto out;
12019         }
12020         ret = btrfs_del_item(trans, root, &path);
12021 out:
12022         btrfs_commit_transaction(trans, root);
12023         btrfs_release_path(&path);
12024         return ret;
12025 }
12026
12027 static int zero_log_tree(struct btrfs_root *root)
12028 {
12029         struct btrfs_trans_handle *trans;
12030         int ret;
12031
12032         trans = btrfs_start_transaction(root, 1);
12033         if (IS_ERR(trans)) {
12034                 ret = PTR_ERR(trans);
12035                 return ret;
12036         }
12037         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12038         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12039         ret = btrfs_commit_transaction(trans, root);
12040         return ret;
12041 }
12042
12043 static int populate_csum(struct btrfs_trans_handle *trans,
12044                          struct btrfs_root *csum_root, char *buf, u64 start,
12045                          u64 len)
12046 {
12047         u64 offset = 0;
12048         u64 sectorsize;
12049         int ret = 0;
12050
12051         while (offset < len) {
12052                 sectorsize = csum_root->sectorsize;
12053                 ret = read_extent_data(csum_root, buf, start + offset,
12054                                        &sectorsize, 0);
12055                 if (ret)
12056                         break;
12057                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12058                                             start + offset, buf, sectorsize);
12059                 if (ret)
12060                         break;
12061                 offset += sectorsize;
12062         }
12063         return ret;
12064 }
12065
12066 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12067                                       struct btrfs_root *csum_root,
12068                                       struct btrfs_root *cur_root)
12069 {
12070         struct btrfs_path path;
12071         struct btrfs_key key;
12072         struct extent_buffer *node;
12073         struct btrfs_file_extent_item *fi;
12074         char *buf = NULL;
12075         u64 start = 0;
12076         u64 len = 0;
12077         int slot = 0;
12078         int ret = 0;
12079
12080         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12081         if (!buf)
12082                 return -ENOMEM;
12083
12084         btrfs_init_path(&path);
12085         key.objectid = 0;
12086         key.offset = 0;
12087         key.type = 0;
12088         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12089         if (ret < 0)
12090                 goto out;
12091         /* Iterate all regular file extents and fill its csum */
12092         while (1) {
12093                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12094
12095                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12096                         goto next;
12097                 node = path.nodes[0];
12098                 slot = path.slots[0];
12099                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12100                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12101                         goto next;
12102                 start = btrfs_file_extent_disk_bytenr(node, fi);
12103                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12104
12105                 ret = populate_csum(trans, csum_root, buf, start, len);
12106                 if (ret == -EEXIST)
12107                         ret = 0;
12108                 if (ret < 0)
12109                         goto out;
12110 next:
12111                 /*
12112                  * TODO: if next leaf is corrupted, jump to nearest next valid
12113                  * leaf.
12114                  */
12115                 ret = btrfs_next_item(cur_root, &path);
12116                 if (ret < 0)
12117                         goto out;
12118                 if (ret > 0) {
12119                         ret = 0;
12120                         goto out;
12121                 }
12122         }
12123
12124 out:
12125         btrfs_release_path(&path);
12126         free(buf);
12127         return ret;
12128 }
12129
12130 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12131                                   struct btrfs_root *csum_root)
12132 {
12133         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12134         struct btrfs_path path;
12135         struct btrfs_root *tree_root = fs_info->tree_root;
12136         struct btrfs_root *cur_root;
12137         struct extent_buffer *node;
12138         struct btrfs_key key;
12139         int slot = 0;
12140         int ret = 0;
12141
12142         btrfs_init_path(&path);
12143         key.objectid = BTRFS_FS_TREE_OBJECTID;
12144         key.offset = 0;
12145         key.type = BTRFS_ROOT_ITEM_KEY;
12146         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12147         if (ret < 0)
12148                 goto out;
12149         if (ret > 0) {
12150                 ret = -ENOENT;
12151                 goto out;
12152         }
12153
12154         while (1) {
12155                 node = path.nodes[0];
12156                 slot = path.slots[0];
12157                 btrfs_item_key_to_cpu(node, &key, slot);
12158                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12159                         goto out;
12160                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12161                         goto next;
12162                 if (!is_fstree(key.objectid))
12163                         goto next;
12164                 key.offset = (u64)-1;
12165
12166                 cur_root = btrfs_read_fs_root(fs_info, &key);
12167                 if (IS_ERR(cur_root) || !cur_root) {
12168                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12169                                 key.objectid);
12170                         goto out;
12171                 }
12172                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12173                                 cur_root);
12174                 if (ret < 0)
12175                         goto out;
12176 next:
12177                 ret = btrfs_next_item(tree_root, &path);
12178                 if (ret > 0) {
12179                         ret = 0;
12180                         goto out;
12181                 }
12182                 if (ret < 0)
12183                         goto out;
12184         }
12185
12186 out:
12187         btrfs_release_path(&path);
12188         return ret;
12189 }
12190
12191 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12192                                       struct btrfs_root *csum_root)
12193 {
12194         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12195         struct btrfs_path path;
12196         struct btrfs_extent_item *ei;
12197         struct extent_buffer *leaf;
12198         char *buf;
12199         struct btrfs_key key;
12200         int ret;
12201
12202         btrfs_init_path(&path);
12203         key.objectid = 0;
12204         key.type = BTRFS_EXTENT_ITEM_KEY;
12205         key.offset = 0;
12206         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12207         if (ret < 0) {
12208                 btrfs_release_path(&path);
12209                 return ret;
12210         }
12211
12212         buf = malloc(csum_root->sectorsize);
12213         if (!buf) {
12214                 btrfs_release_path(&path);
12215                 return -ENOMEM;
12216         }
12217
12218         while (1) {
12219                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12220                         ret = btrfs_next_leaf(extent_root, &path);
12221                         if (ret < 0)
12222                                 break;
12223                         if (ret) {
12224                                 ret = 0;
12225                                 break;
12226                         }
12227                 }
12228                 leaf = path.nodes[0];
12229
12230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12231                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12232                         path.slots[0]++;
12233                         continue;
12234                 }
12235
12236                 ei = btrfs_item_ptr(leaf, path.slots[0],
12237                                     struct btrfs_extent_item);
12238                 if (!(btrfs_extent_flags(leaf, ei) &
12239                       BTRFS_EXTENT_FLAG_DATA)) {
12240                         path.slots[0]++;
12241                         continue;
12242                 }
12243
12244                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12245                                     key.offset);
12246                 if (ret)
12247                         break;
12248                 path.slots[0]++;
12249         }
12250
12251         btrfs_release_path(&path);
12252         free(buf);
12253         return ret;
12254 }
12255
12256 /*
12257  * Recalculate the csum and put it into the csum tree.
12258  *
12259  * Extent tree init will wipe out all the extent info, so in that case, we
12260  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12261  * will use fs/subvol trees to init the csum tree.
12262  */
12263 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12264                           struct btrfs_root *csum_root,
12265                           int search_fs_tree)
12266 {
12267         if (search_fs_tree)
12268                 return fill_csum_tree_from_fs(trans, csum_root);
12269         else
12270                 return fill_csum_tree_from_extent(trans, csum_root);
12271 }
12272
12273 static void free_roots_info_cache(void)
12274 {
12275         if (!roots_info_cache)
12276                 return;
12277
12278         while (!cache_tree_empty(roots_info_cache)) {
12279                 struct cache_extent *entry;
12280                 struct root_item_info *rii;
12281
12282                 entry = first_cache_extent(roots_info_cache);
12283                 if (!entry)
12284                         break;
12285                 remove_cache_extent(roots_info_cache, entry);
12286                 rii = container_of(entry, struct root_item_info, cache_extent);
12287                 free(rii);
12288         }
12289
12290         free(roots_info_cache);
12291         roots_info_cache = NULL;
12292 }
12293
12294 static int build_roots_info_cache(struct btrfs_fs_info *info)
12295 {
12296         int ret = 0;
12297         struct btrfs_key key;
12298         struct extent_buffer *leaf;
12299         struct btrfs_path path;
12300
12301         if (!roots_info_cache) {
12302                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12303                 if (!roots_info_cache)
12304                         return -ENOMEM;
12305                 cache_tree_init(roots_info_cache);
12306         }
12307
12308         btrfs_init_path(&path);
12309         key.objectid = 0;
12310         key.type = BTRFS_EXTENT_ITEM_KEY;
12311         key.offset = 0;
12312         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12313         if (ret < 0)
12314                 goto out;
12315         leaf = path.nodes[0];
12316
12317         while (1) {
12318                 struct btrfs_key found_key;
12319                 struct btrfs_extent_item *ei;
12320                 struct btrfs_extent_inline_ref *iref;
12321                 int slot = path.slots[0];
12322                 int type;
12323                 u64 flags;
12324                 u64 root_id;
12325                 u8 level;
12326                 struct cache_extent *entry;
12327                 struct root_item_info *rii;
12328
12329                 if (slot >= btrfs_header_nritems(leaf)) {
12330                         ret = btrfs_next_leaf(info->extent_root, &path);
12331                         if (ret < 0) {
12332                                 break;
12333                         } else if (ret) {
12334                                 ret = 0;
12335                                 break;
12336                         }
12337                         leaf = path.nodes[0];
12338                         slot = path.slots[0];
12339                 }
12340
12341                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12342
12343                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12344                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12345                         goto next;
12346
12347                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12348                 flags = btrfs_extent_flags(leaf, ei);
12349
12350                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12351                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12352                         goto next;
12353
12354                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12355                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12356                         level = found_key.offset;
12357                 } else {
12358                         struct btrfs_tree_block_info *binfo;
12359
12360                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12361                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12362                         level = btrfs_tree_block_level(leaf, binfo);
12363                 }
12364
12365                 /*
12366                  * For a root extent, it must be of the following type and the
12367                  * first (and only one) iref in the item.
12368                  */
12369                 type = btrfs_extent_inline_ref_type(leaf, iref);
12370                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12371                         goto next;
12372
12373                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12374                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12375                 if (!entry) {
12376                         rii = malloc(sizeof(struct root_item_info));
12377                         if (!rii) {
12378                                 ret = -ENOMEM;
12379                                 goto out;
12380                         }
12381                         rii->cache_extent.start = root_id;
12382                         rii->cache_extent.size = 1;
12383                         rii->level = (u8)-1;
12384                         entry = &rii->cache_extent;
12385                         ret = insert_cache_extent(roots_info_cache, entry);
12386                         ASSERT(ret == 0);
12387                 } else {
12388                         rii = container_of(entry, struct root_item_info,
12389                                            cache_extent);
12390                 }
12391
12392                 ASSERT(rii->cache_extent.start == root_id);
12393                 ASSERT(rii->cache_extent.size == 1);
12394
12395                 if (level > rii->level || rii->level == (u8)-1) {
12396                         rii->level = level;
12397                         rii->bytenr = found_key.objectid;
12398                         rii->gen = btrfs_extent_generation(leaf, ei);
12399                         rii->node_count = 1;
12400                 } else if (level == rii->level) {
12401                         rii->node_count++;
12402                 }
12403 next:
12404                 path.slots[0]++;
12405         }
12406
12407 out:
12408         btrfs_release_path(&path);
12409
12410         return ret;
12411 }
12412
12413 static int maybe_repair_root_item(struct btrfs_path *path,
12414                                   const struct btrfs_key *root_key,
12415                                   const int read_only_mode)
12416 {
12417         const u64 root_id = root_key->objectid;
12418         struct cache_extent *entry;
12419         struct root_item_info *rii;
12420         struct btrfs_root_item ri;
12421         unsigned long offset;
12422
12423         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12424         if (!entry) {
12425                 fprintf(stderr,
12426                         "Error: could not find extent items for root %llu\n",
12427                         root_key->objectid);
12428                 return -ENOENT;
12429         }
12430
12431         rii = container_of(entry, struct root_item_info, cache_extent);
12432         ASSERT(rii->cache_extent.start == root_id);
12433         ASSERT(rii->cache_extent.size == 1);
12434
12435         if (rii->node_count != 1) {
12436                 fprintf(stderr,
12437                         "Error: could not find btree root extent for root %llu\n",
12438                         root_id);
12439                 return -ENOENT;
12440         }
12441
12442         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12443         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12444
12445         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12446             btrfs_root_level(&ri) != rii->level ||
12447             btrfs_root_generation(&ri) != rii->gen) {
12448
12449                 /*
12450                  * If we're in repair mode but our caller told us to not update
12451                  * the root item, i.e. just check if it needs to be updated, don't
12452                  * print this message, since the caller will call us again shortly
12453                  * for the same root item without read only mode (the caller will
12454                  * open a transaction first).
12455                  */
12456                 if (!(read_only_mode && repair))
12457                         fprintf(stderr,
12458                                 "%sroot item for root %llu,"
12459                                 " current bytenr %llu, current gen %llu, current level %u,"
12460                                 " new bytenr %llu, new gen %llu, new level %u\n",
12461                                 (read_only_mode ? "" : "fixing "),
12462                                 root_id,
12463                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12464                                 btrfs_root_level(&ri),
12465                                 rii->bytenr, rii->gen, rii->level);
12466
12467                 if (btrfs_root_generation(&ri) > rii->gen) {
12468                         fprintf(stderr,
12469                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12470                                 root_id, btrfs_root_generation(&ri), rii->gen);
12471                         return -EINVAL;
12472                 }
12473
12474                 if (!read_only_mode) {
12475                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12476                         btrfs_set_root_level(&ri, rii->level);
12477                         btrfs_set_root_generation(&ri, rii->gen);
12478                         write_extent_buffer(path->nodes[0], &ri,
12479                                             offset, sizeof(ri));
12480                 }
12481
12482                 return 1;
12483         }
12484
12485         return 0;
12486 }
12487
12488 /*
12489  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12490  * caused read-only snapshots to be corrupted if they were created at a moment
12491  * when the source subvolume/snapshot had orphan items. The issue was that the
12492  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12493  * node instead of the post orphan cleanup root node.
12494  * So this function, and its callees, just detects and fixes those cases. Even
12495  * though the regression was for read-only snapshots, this function applies to
12496  * any snapshot/subvolume root.
12497  * This must be run before any other repair code - not doing it so, makes other
12498  * repair code delete or modify backrefs in the extent tree for example, which
12499  * will result in an inconsistent fs after repairing the root items.
12500  */
12501 static int repair_root_items(struct btrfs_fs_info *info)
12502 {
12503         struct btrfs_path path;
12504         struct btrfs_key key;
12505         struct extent_buffer *leaf;
12506         struct btrfs_trans_handle *trans = NULL;
12507         int ret = 0;
12508         int bad_roots = 0;
12509         int need_trans = 0;
12510
12511         btrfs_init_path(&path);
12512
12513         ret = build_roots_info_cache(info);
12514         if (ret)
12515                 goto out;
12516
12517         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12518         key.type = BTRFS_ROOT_ITEM_KEY;
12519         key.offset = 0;
12520
12521 again:
12522         /*
12523          * Avoid opening and committing transactions if a leaf doesn't have
12524          * any root items that need to be fixed, so that we avoid rotating
12525          * backup roots unnecessarily.
12526          */
12527         if (need_trans) {
12528                 trans = btrfs_start_transaction(info->tree_root, 1);
12529                 if (IS_ERR(trans)) {
12530                         ret = PTR_ERR(trans);
12531                         goto out;
12532                 }
12533         }
12534
12535         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12536                                 0, trans ? 1 : 0);
12537         if (ret < 0)
12538                 goto out;
12539         leaf = path.nodes[0];
12540
12541         while (1) {
12542                 struct btrfs_key found_key;
12543
12544                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12545                         int no_more_keys = find_next_key(&path, &key);
12546
12547                         btrfs_release_path(&path);
12548                         if (trans) {
12549                                 ret = btrfs_commit_transaction(trans,
12550                                                                info->tree_root);
12551                                 trans = NULL;
12552                                 if (ret < 0)
12553                                         goto out;
12554                         }
12555                         need_trans = 0;
12556                         if (no_more_keys)
12557                                 break;
12558                         goto again;
12559                 }
12560
12561                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12562
12563                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12564                         goto next;
12565                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12566                         goto next;
12567
12568                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12569                 if (ret < 0)
12570                         goto out;
12571                 if (ret) {
12572                         if (!trans && repair) {
12573                                 need_trans = 1;
12574                                 key = found_key;
12575                                 btrfs_release_path(&path);
12576                                 goto again;
12577                         }
12578                         bad_roots++;
12579                 }
12580 next:
12581                 path.slots[0]++;
12582         }
12583         ret = 0;
12584 out:
12585         free_roots_info_cache();
12586         btrfs_release_path(&path);
12587         if (trans)
12588                 btrfs_commit_transaction(trans, info->tree_root);
12589         if (ret < 0)
12590                 return ret;
12591
12592         return bad_roots;
12593 }
12594
12595 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12596 {
12597         struct btrfs_trans_handle *trans;
12598         struct btrfs_block_group_cache *bg_cache;
12599         u64 current = 0;
12600         int ret = 0;
12601
12602         /* Clear all free space cache inodes and its extent data */
12603         while (1) {
12604                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12605                 if (!bg_cache)
12606                         break;
12607                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12608                 if (ret < 0)
12609                         return ret;
12610                 current = bg_cache->key.objectid + bg_cache->key.offset;
12611         }
12612
12613         /* Don't forget to set cache_generation to -1 */
12614         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12615         if (IS_ERR(trans)) {
12616                 error("failed to update super block cache generation");
12617                 return PTR_ERR(trans);
12618         }
12619         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12620         btrfs_commit_transaction(trans, fs_info->tree_root);
12621
12622         return ret;
12623 }
12624
12625 const char * const cmd_check_usage[] = {
12626         "btrfs check [options] <device>",
12627         "Check structural integrity of a filesystem (unmounted).",
12628         "Check structural integrity of an unmounted filesystem. Verify internal",
12629         "trees' consistency and item connectivity. In the repair mode try to",
12630         "fix the problems found. ",
12631         "WARNING: the repair mode is considered dangerous",
12632         "",
12633         "-s|--super <superblock>     use this superblock copy",
12634         "-b|--backup                 use the first valid backup root copy",
12635         "--repair                    try to repair the filesystem",
12636         "--readonly                  run in read-only mode (default)",
12637         "--init-csum-tree            create a new CRC tree",
12638         "--init-extent-tree          create a new extent tree",
12639         "--mode <MODE>               allows choice of memory/IO trade-offs",
12640         "                            where MODE is one of:",
12641         "                            original - read inodes and extents to memory (requires",
12642         "                                       more memory, does less IO)",
12643         "                            lowmem   - try to use less memory but read blocks again",
12644         "                                       when needed",
12645         "--check-data-csum           verify checksums of data blocks",
12646         "-Q|--qgroup-report          print a report on qgroup consistency",
12647         "-E|--subvol-extents <subvolid>",
12648         "                            print subvolume extents and sharing state",
12649         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12650         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12651         "-p|--progress               indicate progress",
12652         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12653         NULL
12654 };
12655
12656 int cmd_check(int argc, char **argv)
12657 {
12658         struct cache_tree root_cache;
12659         struct btrfs_root *root;
12660         struct btrfs_fs_info *info;
12661         u64 bytenr = 0;
12662         u64 subvolid = 0;
12663         u64 tree_root_bytenr = 0;
12664         u64 chunk_root_bytenr = 0;
12665         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12666         int ret;
12667         int err = 0;
12668         u64 num;
12669         int init_csum_tree = 0;
12670         int readonly = 0;
12671         int clear_space_cache = 0;
12672         int qgroup_report = 0;
12673         int qgroups_repaired = 0;
12674         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12675
12676         while(1) {
12677                 int c;
12678                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12679                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12680                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12681                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12682                 static const struct option long_options[] = {
12683                         { "super", required_argument, NULL, 's' },
12684                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12685                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12686                         { "init-csum-tree", no_argument, NULL,
12687                                 GETOPT_VAL_INIT_CSUM },
12688                         { "init-extent-tree", no_argument, NULL,
12689                                 GETOPT_VAL_INIT_EXTENT },
12690                         { "check-data-csum", no_argument, NULL,
12691                                 GETOPT_VAL_CHECK_CSUM },
12692                         { "backup", no_argument, NULL, 'b' },
12693                         { "subvol-extents", required_argument, NULL, 'E' },
12694                         { "qgroup-report", no_argument, NULL, 'Q' },
12695                         { "tree-root", required_argument, NULL, 'r' },
12696                         { "chunk-root", required_argument, NULL,
12697                                 GETOPT_VAL_CHUNK_TREE },
12698                         { "progress", no_argument, NULL, 'p' },
12699                         { "mode", required_argument, NULL,
12700                                 GETOPT_VAL_MODE },
12701                         { "clear-space-cache", required_argument, NULL,
12702                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12703                         { NULL, 0, NULL, 0}
12704                 };
12705
12706                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12707                 if (c < 0)
12708                         break;
12709                 switch(c) {
12710                         case 'a': /* ignored */ break;
12711                         case 'b':
12712                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12713                                 break;
12714                         case 's':
12715                                 num = arg_strtou64(optarg);
12716                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12717                                         error(
12718                                         "super mirror should be less than %d",
12719                                                 BTRFS_SUPER_MIRROR_MAX);
12720                                         exit(1);
12721                                 }
12722                                 bytenr = btrfs_sb_offset(((int)num));
12723                                 printf("using SB copy %llu, bytenr %llu\n", num,
12724                                        (unsigned long long)bytenr);
12725                                 break;
12726                         case 'Q':
12727                                 qgroup_report = 1;
12728                                 break;
12729                         case 'E':
12730                                 subvolid = arg_strtou64(optarg);
12731                                 break;
12732                         case 'r':
12733                                 tree_root_bytenr = arg_strtou64(optarg);
12734                                 break;
12735                         case GETOPT_VAL_CHUNK_TREE:
12736                                 chunk_root_bytenr = arg_strtou64(optarg);
12737                                 break;
12738                         case 'p':
12739                                 ctx.progress_enabled = true;
12740                                 break;
12741                         case '?':
12742                         case 'h':
12743                                 usage(cmd_check_usage);
12744                         case GETOPT_VAL_REPAIR:
12745                                 printf("enabling repair mode\n");
12746                                 repair = 1;
12747                                 ctree_flags |= OPEN_CTREE_WRITES;
12748                                 break;
12749                         case GETOPT_VAL_READONLY:
12750                                 readonly = 1;
12751                                 break;
12752                         case GETOPT_VAL_INIT_CSUM:
12753                                 printf("Creating a new CRC tree\n");
12754                                 init_csum_tree = 1;
12755                                 repair = 1;
12756                                 ctree_flags |= OPEN_CTREE_WRITES;
12757                                 break;
12758                         case GETOPT_VAL_INIT_EXTENT:
12759                                 init_extent_tree = 1;
12760                                 ctree_flags |= (OPEN_CTREE_WRITES |
12761                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12762                                 repair = 1;
12763                                 break;
12764                         case GETOPT_VAL_CHECK_CSUM:
12765                                 check_data_csum = 1;
12766                                 break;
12767                         case GETOPT_VAL_MODE:
12768                                 check_mode = parse_check_mode(optarg);
12769                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12770                                         error("unknown mode: %s", optarg);
12771                                         exit(1);
12772                                 }
12773                                 break;
12774                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12775                                 if (strcmp(optarg, "v1") == 0) {
12776                                         clear_space_cache = 1;
12777                                 } else if (strcmp(optarg, "v2") == 0) {
12778                                         clear_space_cache = 2;
12779                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12780                                 } else {
12781                                         error(
12782                 "invalid argument to --clear-space-cache, must be v1 or v2");
12783                                         exit(1);
12784                                 }
12785                                 ctree_flags |= OPEN_CTREE_WRITES;
12786                                 break;
12787                 }
12788         }
12789
12790         if (check_argc_exact(argc - optind, 1))
12791                 usage(cmd_check_usage);
12792
12793         if (ctx.progress_enabled) {
12794                 ctx.tp = TASK_NOTHING;
12795                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12796         }
12797
12798         /* This check is the only reason for --readonly to exist */
12799         if (readonly && repair) {
12800                 error("repair options are not compatible with --readonly");
12801                 exit(1);
12802         }
12803
12804         /*
12805          * Not supported yet
12806          */
12807         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12808                 error("low memory mode doesn't support repair yet");
12809                 exit(1);
12810         }
12811
12812         radix_tree_init();
12813         cache_tree_init(&root_cache);
12814
12815         if((ret = check_mounted(argv[optind])) < 0) {
12816                 error("could not check mount status: %s", strerror(-ret));
12817                 err |= !!ret;
12818                 goto err_out;
12819         } else if(ret) {
12820                 error("%s is currently mounted, aborting", argv[optind]);
12821                 ret = -EBUSY;
12822                 err |= !!ret;
12823                 goto err_out;
12824         }
12825
12826         /* only allow partial opening under repair mode */
12827         if (repair)
12828                 ctree_flags |= OPEN_CTREE_PARTIAL;
12829
12830         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12831                                   chunk_root_bytenr, ctree_flags);
12832         if (!info) {
12833                 error("cannot open file system");
12834                 ret = -EIO;
12835                 err |= !!ret;
12836                 goto err_out;
12837         }
12838
12839         global_info = info;
12840         root = info->fs_root;
12841         if (clear_space_cache == 1) {
12842                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12843                         error(
12844                 "free space cache v2 detected, use --clear-space-cache v2");
12845                         ret = 1;
12846                         goto close_out;
12847                 }
12848                 printf("Clearing free space cache\n");
12849                 ret = clear_free_space_cache(info);
12850                 if (ret) {
12851                         error("failed to clear free space cache");
12852                         ret = 1;
12853                 } else {
12854                         printf("Free space cache cleared\n");
12855                 }
12856                 goto close_out;
12857         } else if (clear_space_cache == 2) {
12858                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12859                         printf("no free space cache v2 to clear\n");
12860                         ret = 0;
12861                         goto close_out;
12862                 }
12863                 printf("Clear free space cache v2\n");
12864                 ret = btrfs_clear_free_space_tree(info);
12865                 if (ret) {
12866                         error("failed to clear free space cache v2: %d", ret);
12867                         ret = 1;
12868                 } else {
12869                         printf("free space cache v2 cleared\n");
12870                 }
12871                 goto close_out;
12872         }
12873
12874         /*
12875          * repair mode will force us to commit transaction which
12876          * will make us fail to load log tree when mounting.
12877          */
12878         if (repair && btrfs_super_log_root(info->super_copy)) {
12879                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12880                 if (!ret) {
12881                         ret = 1;
12882                         err |= !!ret;
12883                         goto close_out;
12884                 }
12885                 ret = zero_log_tree(root);
12886                 err |= !!ret;
12887                 if (ret) {
12888                         error("failed to zero log tree: %d", ret);
12889                         goto close_out;
12890                 }
12891         }
12892
12893         uuid_unparse(info->super_copy->fsid, uuidbuf);
12894         if (qgroup_report) {
12895                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12896                        uuidbuf);
12897                 ret = qgroup_verify_all(info);
12898                 err |= !!ret;
12899                 if (ret == 0)
12900                         report_qgroups(1);
12901                 goto close_out;
12902         }
12903         if (subvolid) {
12904                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12905                        subvolid, argv[optind], uuidbuf);
12906                 ret = print_extent_state(info, subvolid);
12907                 err |= !!ret;
12908                 goto close_out;
12909         }
12910         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12911
12912         if (!extent_buffer_uptodate(info->tree_root->node) ||
12913             !extent_buffer_uptodate(info->dev_root->node) ||
12914             !extent_buffer_uptodate(info->chunk_root->node)) {
12915                 error("critical roots corrupted, unable to check the filesystem");
12916                 err |= !!ret;
12917                 ret = -EIO;
12918                 goto close_out;
12919         }
12920
12921         if (init_extent_tree || init_csum_tree) {
12922                 struct btrfs_trans_handle *trans;
12923
12924                 trans = btrfs_start_transaction(info->extent_root, 0);
12925                 if (IS_ERR(trans)) {
12926                         error("error starting transaction");
12927                         ret = PTR_ERR(trans);
12928                         err |= !!ret;
12929                         goto close_out;
12930                 }
12931
12932                 if (init_extent_tree) {
12933                         printf("Creating a new extent tree\n");
12934                         ret = reinit_extent_tree(trans, info);
12935                         err |= !!ret;
12936                         if (ret)
12937                                 goto close_out;
12938                 }
12939
12940                 if (init_csum_tree) {
12941                         printf("Reinitialize checksum tree\n");
12942                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12943                         if (ret) {
12944                                 error("checksum tree initialization failed: %d",
12945                                                 ret);
12946                                 ret = -EIO;
12947                                 err |= !!ret;
12948                                 goto close_out;
12949                         }
12950
12951                         ret = fill_csum_tree(trans, info->csum_root,
12952                                              init_extent_tree);
12953                         err |= !!ret;
12954                         if (ret) {
12955                                 error("checksum tree refilling failed: %d", ret);
12956                                 return -EIO;
12957                         }
12958                 }
12959                 /*
12960                  * Ok now we commit and run the normal fsck, which will add
12961                  * extent entries for all of the items it finds.
12962                  */
12963                 ret = btrfs_commit_transaction(trans, info->extent_root);
12964                 err |= !!ret;
12965                 if (ret)
12966                         goto close_out;
12967         }
12968         if (!extent_buffer_uptodate(info->extent_root->node)) {
12969                 error("critical: extent_root, unable to check the filesystem");
12970                 ret = -EIO;
12971                 err |= !!ret;
12972                 goto close_out;
12973         }
12974         if (!extent_buffer_uptodate(info->csum_root->node)) {
12975                 error("critical: csum_root, unable to check the filesystem");
12976                 ret = -EIO;
12977                 err |= !!ret;
12978                 goto close_out;
12979         }
12980
12981         if (!ctx.progress_enabled)
12982                 fprintf(stderr, "checking extents\n");
12983         if (check_mode == CHECK_MODE_LOWMEM)
12984                 ret = check_chunks_and_extents_v2(root);
12985         else
12986                 ret = check_chunks_and_extents(root);
12987         err |= !!ret;
12988         if (ret)
12989                 error(
12990                 "errors found in extent allocation tree or chunk allocation");
12991
12992         ret = repair_root_items(info);
12993         err |= !!ret;
12994         if (ret < 0) {
12995                 error("failed to repair root items: %s", strerror(-ret));
12996                 goto close_out;
12997         }
12998         if (repair) {
12999                 fprintf(stderr, "Fixed %d roots.\n", ret);
13000                 ret = 0;
13001         } else if (ret > 0) {
13002                 fprintf(stderr,
13003                        "Found %d roots with an outdated root item.\n",
13004                        ret);
13005                 fprintf(stderr,
13006                         "Please run a filesystem check with the option --repair to fix them.\n");
13007                 ret = 1;
13008                 err |= !!ret;
13009                 goto close_out;
13010         }
13011
13012         if (!ctx.progress_enabled) {
13013                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13014                         fprintf(stderr, "checking free space tree\n");
13015                 else
13016                         fprintf(stderr, "checking free space cache\n");
13017         }
13018         ret = check_space_cache(root);
13019         err |= !!ret;
13020         if (ret) {
13021                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13022                         error("errors found in free space tree");
13023                 else
13024                         error("errors found in free space cache");
13025                 goto out;
13026         }
13027
13028         /*
13029          * We used to have to have these hole extents in between our real
13030          * extents so if we don't have this flag set we need to make sure there
13031          * are no gaps in the file extents for inodes, otherwise we can just
13032          * ignore it when this happens.
13033          */
13034         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13035         if (!ctx.progress_enabled)
13036                 fprintf(stderr, "checking fs roots\n");
13037         if (check_mode == CHECK_MODE_LOWMEM)
13038                 ret = check_fs_roots_v2(root->fs_info);
13039         else
13040                 ret = check_fs_roots(root, &root_cache);
13041         err |= !!ret;
13042         if (ret) {
13043                 error("errors found in fs roots");
13044                 goto out;
13045         }
13046
13047         fprintf(stderr, "checking csums\n");
13048         ret = check_csums(root);
13049         err |= !!ret;
13050         if (ret) {
13051                 error("errors found in csum tree");
13052                 goto out;
13053         }
13054
13055         fprintf(stderr, "checking root refs\n");
13056         /* For low memory mode, check_fs_roots_v2 handles root refs */
13057         if (check_mode != CHECK_MODE_LOWMEM) {
13058                 ret = check_root_refs(root, &root_cache);
13059                 err |= !!ret;
13060                 if (ret) {
13061                         error("errors found in root refs");
13062                         goto out;
13063                 }
13064         }
13065
13066         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13067                 struct extent_buffer *eb;
13068
13069                 eb = list_first_entry(&root->fs_info->recow_ebs,
13070                                       struct extent_buffer, recow);
13071                 list_del_init(&eb->recow);
13072                 ret = recow_extent_buffer(root, eb);
13073                 err |= !!ret;
13074                 if (ret) {
13075                         error("fails to fix transid errors");
13076                         break;
13077                 }
13078         }
13079
13080         while (!list_empty(&delete_items)) {
13081                 struct bad_item *bad;
13082
13083                 bad = list_first_entry(&delete_items, struct bad_item, list);
13084                 list_del_init(&bad->list);
13085                 if (repair) {
13086                         ret = delete_bad_item(root, bad);
13087                         err |= !!ret;
13088                 }
13089                 free(bad);
13090         }
13091
13092         if (info->quota_enabled) {
13093                 fprintf(stderr, "checking quota groups\n");
13094                 ret = qgroup_verify_all(info);
13095                 err |= !!ret;
13096                 if (ret) {
13097                         error("failed to check quota groups");
13098                         goto out;
13099                 }
13100                 report_qgroups(0);
13101                 ret = repair_qgroups(info, &qgroups_repaired);
13102                 err |= !!ret;
13103                 if (err) {
13104                         error("failed to repair quota groups");
13105                         goto out;
13106                 }
13107                 ret = 0;
13108         }
13109
13110         if (!list_empty(&root->fs_info->recow_ebs)) {
13111                 error("transid errors in file system");
13112                 ret = 1;
13113                 err |= !!ret;
13114         }
13115 out:
13116         if (found_old_backref) { /*
13117                  * there was a disk format change when mixed
13118                  * backref was in testing tree. The old format
13119                  * existed about one week.
13120                  */
13121                 printf("\n * Found old mixed backref format. "
13122                        "The old format is not supported! *"
13123                        "\n * Please mount the FS in readonly mode, "
13124                        "backup data and re-format the FS. *\n\n");
13125                 err |= 1;
13126         }
13127         printf("found %llu bytes used, ",
13128                (unsigned long long)bytes_used);
13129         if (err)
13130                 printf("error(s) found\n");
13131         else
13132                 printf("no error found\n");
13133         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13134         printf("total tree bytes: %llu\n",
13135                (unsigned long long)total_btree_bytes);
13136         printf("total fs tree bytes: %llu\n",
13137                (unsigned long long)total_fs_tree_bytes);
13138         printf("total extent tree bytes: %llu\n",
13139                (unsigned long long)total_extent_tree_bytes);
13140         printf("btree space waste bytes: %llu\n",
13141                (unsigned long long)btree_space_waste);
13142         printf("file data blocks allocated: %llu\n referenced %llu\n",
13143                 (unsigned long long)data_bytes_allocated,
13144                 (unsigned long long)data_bytes_referenced);
13145
13146         free_qgroup_counts();
13147         free_root_recs_tree(&root_cache);
13148 close_out:
13149         close_ctree(root);
13150 err_out:
13151         if (ctx.progress_enabled)
13152                 task_deinit(ctx.info);
13153
13154         return err;
13155 }