ec56bbabcc46e882e8492d82edd6952395ad5ab8
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 /*
1872  * Returns >0  Found error, not fatal, should continue
1873  * Returns <0  Fatal error, must exit the whole check
1874  * Returns 0   No errors found
1875  */
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877                                struct node_refs *nrefs, int *level, int ext_ref)
1878 {
1879         struct extent_buffer *cur = path->nodes[0];
1880         struct btrfs_key key;
1881         u64 cur_bytenr;
1882         u32 nritems;
1883         u64 first_ino = 0;
1884         int root_level = btrfs_header_level(root->node);
1885         int i;
1886         int ret = 0; /* Final return value */
1887         int err = 0; /* Positive error bitmap */
1888
1889         cur_bytenr = cur->start;
1890
1891         /* skip to first inode item or the first inode number change */
1892         nritems = btrfs_header_nritems(cur);
1893         for (i = 0; i < nritems; i++) {
1894                 btrfs_item_key_to_cpu(cur, &key, i);
1895                 if (i == 0)
1896                         first_ino = key.objectid;
1897                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898                     (first_ino && first_ino != key.objectid))
1899                         break;
1900         }
1901         if (i == nritems) {
1902                 path->slots[0] = nritems;
1903                 return 0;
1904         }
1905         path->slots[0] = i;
1906
1907 again:
1908         err |= check_inode_item(root, path, ext_ref);
1909
1910         if (err & LAST_ITEM)
1911                 goto out;
1912
1913         /* still have inode items in thie leaf */
1914         if (cur->start == cur_bytenr)
1915                 goto again;
1916
1917         /*
1918          * we have switched to another leaf, above nodes may
1919          * have changed, here walk down the path, if a node
1920          * or leaf is shared, check whether we can skip this
1921          * node or leaf.
1922          */
1923         for (i = root_level; i >= 0; i--) {
1924                 if (path->nodes[i]->start == nrefs->bytenr[i])
1925                         continue;
1926
1927                 ret = update_nodes_refs(root,
1928                                 path->nodes[i]->start,
1929                                 nrefs, i);
1930                 if (ret)
1931                         goto out;
1932
1933                 if (!nrefs->need_check[i]) {
1934                         *level += 1;
1935                         break;
1936                 }
1937         }
1938
1939         for (i = 0; i < *level; i++) {
1940                 free_extent_buffer(path->nodes[i]);
1941                 path->nodes[i] = NULL;
1942         }
1943 out:
1944         err &= ~LAST_ITEM;
1945         if (err && !ret)
1946                 ret = err;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         free_extent_buffer(next);
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 /*
2217  * Returns >0  Found error, should continue
2218  * Returns <0  Fatal error, must exit the whole check
2219  * Returns 0   No errors found
2220  */
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222                              int *level, struct node_refs *nrefs, int ext_ref)
2223 {
2224         enum btrfs_tree_block_status status;
2225         u64 bytenr;
2226         u64 ptr_gen;
2227         struct extent_buffer *next;
2228         struct extent_buffer *cur;
2229         u32 blocksize;
2230         int ret;
2231
2232         WARN_ON(*level < 0);
2233         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2234
2235         ret = update_nodes_refs(root, path->nodes[*level]->start,
2236                                 nrefs, *level);
2237         if (ret < 0)
2238                 return ret;
2239
2240         while (*level >= 0) {
2241                 WARN_ON(*level < 0);
2242                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243                 cur = path->nodes[*level];
2244
2245                 if (btrfs_header_level(cur) != *level)
2246                         WARN_ON(1);
2247
2248                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249                         break;
2250                 /* Don't forgot to check leaf/node validation */
2251                 if (*level == 0) {
2252                         ret = btrfs_check_leaf(root, NULL, cur);
2253                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2254                                 ret = -EIO;
2255                                 break;
2256                         }
2257                         ret = process_one_leaf_v2(root, path, nrefs,
2258                                                   level, ext_ref);
2259                         break;
2260                 } else {
2261                         ret = btrfs_check_node(root, NULL, cur);
2262                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2263                                 ret = -EIO;
2264                                 break;
2265                         }
2266                 }
2267                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269                 blocksize = root->nodesize;
2270
2271                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2272                 if (ret)
2273                         break;
2274                 if (!nrefs->need_check[*level - 1]) {
2275                         path->slots[*level]++;
2276                         continue;
2277                 }
2278
2279                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root, bytenr, blocksize,
2284                                                ptr_gen);
2285                         if (!extent_buffer_uptodate(next)) {
2286                                 struct btrfs_key node_key;
2287
2288                                 btrfs_node_key_to_cpu(path->nodes[*level],
2289                                                       &node_key,
2290                                                       path->slots[*level]);
2291                                 btrfs_add_corrupt_extent_record(root->fs_info,
2292                                                 &node_key,
2293                                                 path->nodes[*level]->start,
2294                                                 root->nodesize, *level);
2295                                 ret = -EIO;
2296                                 break;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret < 0) 
2302                         break;
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         ret = -EIO;
2311                         break;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319         return ret;
2320 }
2321
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323                         struct walk_control *wc, int *level)
2324 {
2325         int i;
2326         struct extent_buffer *leaf;
2327
2328         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329                 leaf = path->nodes[i];
2330                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2331                         path->slots[i]++;
2332                         *level = i;
2333                         return 0;
2334                 } else {
2335                         free_extent_buffer(path->nodes[*level]);
2336                         path->nodes[*level] = NULL;
2337                         BUG_ON(*level > wc->active_node);
2338                         if (*level == wc->active_node)
2339                                 leave_shared_node(root, wc, *level);
2340                         *level = i + 1;
2341                 }
2342         }
2343         return 1;
2344 }
2345
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2347                            int *level)
2348 {
2349         int i;
2350         struct extent_buffer *leaf;
2351
2352         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353                 leaf = path->nodes[i];
2354                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355                         path->slots[i]++;
2356                         *level = i;
2357                         return 0;
2358                 } else {
2359                         free_extent_buffer(path->nodes[*level]);
2360                         path->nodes[*level] = NULL;
2361                         *level = i + 1;
2362                 }
2363         }
2364         return 1;
2365 }
2366
2367 static int check_root_dir(struct inode_record *rec)
2368 {
2369         struct inode_backref *backref;
2370         int ret = -1;
2371
2372         if (!rec->found_inode_item || rec->errors)
2373                 goto out;
2374         if (rec->nlink != 1 || rec->found_link != 0)
2375                 goto out;
2376         if (list_empty(&rec->backrefs))
2377                 goto out;
2378         backref = to_inode_backref(rec->backrefs.next);
2379         if (!backref->found_inode_ref)
2380                 goto out;
2381         if (backref->index != 0 || backref->namelen != 2 ||
2382             memcmp(backref->name, "..", 2))
2383                 goto out;
2384         if (backref->found_dir_index || backref->found_dir_item)
2385                 goto out;
2386         ret = 0;
2387 out:
2388         return ret;
2389 }
2390
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392                               struct btrfs_root *root, struct btrfs_path *path,
2393                               struct inode_record *rec)
2394 {
2395         struct btrfs_inode_item *ei;
2396         struct btrfs_key key;
2397         int ret;
2398
2399         key.objectid = rec->ino;
2400         key.type = BTRFS_INODE_ITEM_KEY;
2401         key.offset = (u64)-1;
2402
2403         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2404         if (ret < 0)
2405                 goto out;
2406         if (ret) {
2407                 if (!path->slots[0]) {
2408                         ret = -ENOENT;
2409                         goto out;
2410                 }
2411                 path->slots[0]--;
2412                 ret = 0;
2413         }
2414         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415         if (key.objectid != rec->ino) {
2416                 ret = -ENOENT;
2417                 goto out;
2418         }
2419
2420         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421                             struct btrfs_inode_item);
2422         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423         btrfs_mark_buffer_dirty(path->nodes[0]);
2424         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426                root->root_key.objectid);
2427 out:
2428         btrfs_release_path(path);
2429         return ret;
2430 }
2431
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433                                     struct btrfs_root *root,
2434                                     struct btrfs_path *path,
2435                                     struct inode_record *rec)
2436 {
2437         int ret;
2438
2439         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440         btrfs_release_path(path);
2441         if (!ret)
2442                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2443         return ret;
2444 }
2445
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447                                struct btrfs_root *root,
2448                                struct btrfs_path *path,
2449                                struct inode_record *rec)
2450 {
2451         struct btrfs_inode_item *ei;
2452         struct btrfs_key key;
2453         int ret = 0;
2454
2455         key.objectid = rec->ino;
2456         key.type = BTRFS_INODE_ITEM_KEY;
2457         key.offset = 0;
2458
2459         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2460         if (ret) {
2461                 if (ret > 0)
2462                         ret = -ENOENT;
2463                 goto out;
2464         }
2465
2466         /* Since ret == 0, no need to check anything */
2467         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468                             struct btrfs_inode_item);
2469         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470         btrfs_mark_buffer_dirty(path->nodes[0]);
2471         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472         printf("reset nbytes for ino %llu root %llu\n",
2473                rec->ino, root->root_key.objectid);
2474 out:
2475         btrfs_release_path(path);
2476         return ret;
2477 }
2478
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480                                  struct cache_tree *inode_cache,
2481                                  struct inode_record *rec,
2482                                  struct inode_backref *backref)
2483 {
2484         struct btrfs_path path;
2485         struct btrfs_trans_handle *trans;
2486         struct btrfs_dir_item *dir_item;
2487         struct extent_buffer *leaf;
2488         struct btrfs_key key;
2489         struct btrfs_disk_key disk_key;
2490         struct inode_record *dir_rec;
2491         unsigned long name_ptr;
2492         u32 data_size = sizeof(*dir_item) + backref->namelen;
2493         int ret;
2494
2495         trans = btrfs_start_transaction(root, 1);
2496         if (IS_ERR(trans))
2497                 return PTR_ERR(trans);
2498
2499         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500                 (unsigned long long)rec->ino);
2501
2502         btrfs_init_path(&path);
2503         key.objectid = backref->dir;
2504         key.type = BTRFS_DIR_INDEX_KEY;
2505         key.offset = backref->index;
2506         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2507         BUG_ON(ret);
2508
2509         leaf = path.nodes[0];
2510         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2511
2512         disk_key.objectid = cpu_to_le64(rec->ino);
2513         disk_key.type = BTRFS_INODE_ITEM_KEY;
2514         disk_key.offset = 0;
2515
2516         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518         btrfs_set_dir_data_len(leaf, dir_item, 0);
2519         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520         name_ptr = (unsigned long)(dir_item + 1);
2521         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522         btrfs_mark_buffer_dirty(leaf);
2523         btrfs_release_path(&path);
2524         btrfs_commit_transaction(trans, root);
2525
2526         backref->found_dir_index = 1;
2527         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528         BUG_ON(IS_ERR(dir_rec));
2529         if (!dir_rec)
2530                 return 0;
2531         dir_rec->found_size += backref->namelen;
2532         if (dir_rec->found_size == dir_rec->isize &&
2533             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535         if (dir_rec->found_size != dir_rec->isize)
2536                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2537
2538         return 0;
2539 }
2540
2541 static int delete_dir_index(struct btrfs_root *root,
2542                             struct inode_backref *backref)
2543 {
2544         struct btrfs_trans_handle *trans;
2545         struct btrfs_dir_item *di;
2546         struct btrfs_path path;
2547         int ret = 0;
2548
2549         trans = btrfs_start_transaction(root, 1);
2550         if (IS_ERR(trans))
2551                 return PTR_ERR(trans);
2552
2553         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554                 (unsigned long long)backref->dir,
2555                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556                 (unsigned long long)root->objectid);
2557
2558         btrfs_init_path(&path);
2559         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560                                     backref->name, backref->namelen,
2561                                     backref->index, -1);
2562         if (IS_ERR(di)) {
2563                 ret = PTR_ERR(di);
2564                 btrfs_release_path(&path);
2565                 btrfs_commit_transaction(trans, root);
2566                 if (ret == -ENOENT)
2567                         return 0;
2568                 return ret;
2569         }
2570
2571         if (!di)
2572                 ret = btrfs_del_item(trans, root, &path);
2573         else
2574                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2575         BUG_ON(ret);
2576         btrfs_release_path(&path);
2577         btrfs_commit_transaction(trans, root);
2578         return ret;
2579 }
2580
2581 static int create_inode_item(struct btrfs_root *root,
2582                              struct inode_record *rec,
2583                              int root_dir)
2584 {
2585         struct btrfs_trans_handle *trans;
2586         struct btrfs_inode_item inode_item;
2587         time_t now = time(NULL);
2588         int ret;
2589
2590         trans = btrfs_start_transaction(root, 1);
2591         if (IS_ERR(trans)) {
2592                 ret = PTR_ERR(trans);
2593                 return ret;
2594         }
2595
2596         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597                 "be incomplete, please check permissions and content after "
2598                 "the fsck completes.\n", (unsigned long long)root->objectid,
2599                 (unsigned long long)rec->ino);
2600
2601         memset(&inode_item, 0, sizeof(inode_item));
2602         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2603         if (root_dir)
2604                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2605         else
2606                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608         if (rec->found_dir_item) {
2609                 if (rec->found_file_extent)
2610                         fprintf(stderr, "root %llu inode %llu has both a dir "
2611                                 "item and extents, unsure if it is a dir or a "
2612                                 "regular file so setting it as a directory\n",
2613                                 (unsigned long long)root->objectid,
2614                                 (unsigned long long)rec->ino);
2615                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617         } else if (!rec->found_dir_item) {
2618                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2620         }
2621         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2629
2630         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2631         BUG_ON(ret);
2632         btrfs_commit_transaction(trans, root);
2633         return 0;
2634 }
2635
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637                                  struct inode_record *rec,
2638                                  struct cache_tree *inode_cache,
2639                                  int delete)
2640 {
2641         struct inode_backref *tmp, *backref;
2642         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2643         int ret = 0;
2644         int repaired = 0;
2645
2646         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647                 if (!delete && rec->ino == root_dirid) {
2648                         if (!rec->found_inode_item) {
2649                                 ret = create_inode_item(root, rec, 1);
2650                                 if (ret)
2651                                         break;
2652                                 repaired++;
2653                         }
2654                 }
2655
2656                 /* Index 0 for root dir's are special, don't mess with it */
2657                 if (rec->ino == root_dirid && backref->index == 0)
2658                         continue;
2659
2660                 if (delete &&
2661                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2662                      (backref->found_dir_index && backref->found_inode_ref &&
2663                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664                         ret = delete_dir_index(root, backref);
2665                         if (ret)
2666                                 break;
2667                         repaired++;
2668                         list_del(&backref->list);
2669                         free(backref);
2670                 }
2671
2672                 if (!delete && !backref->found_dir_index &&
2673                     backref->found_dir_item && backref->found_inode_ref) {
2674                         ret = add_missing_dir_index(root, inode_cache, rec,
2675                                                     backref);
2676                         if (ret)
2677                                 break;
2678                         repaired++;
2679                         if (backref->found_dir_item &&
2680                             backref->found_dir_index &&
2681                             backref->found_dir_index) {
2682                                 if (!backref->errors &&
2683                                     backref->found_inode_ref) {
2684                                         list_del(&backref->list);
2685                                         free(backref);
2686                                 }
2687                         }
2688                 }
2689
2690                 if (!delete && (!backref->found_dir_index &&
2691                                 !backref->found_dir_item &&
2692                                 backref->found_inode_ref)) {
2693                         struct btrfs_trans_handle *trans;
2694                         struct btrfs_key location;
2695
2696                         ret = check_dir_conflict(root, backref->name,
2697                                                  backref->namelen,
2698                                                  backref->dir,
2699                                                  backref->index);
2700                         if (ret) {
2701                                 /*
2702                                  * let nlink fixing routine to handle it,
2703                                  * which can do it better.
2704                                  */
2705                                 ret = 0;
2706                                 break;
2707                         }
2708                         location.objectid = rec->ino;
2709                         location.type = BTRFS_INODE_ITEM_KEY;
2710                         location.offset = 0;
2711
2712                         trans = btrfs_start_transaction(root, 1);
2713                         if (IS_ERR(trans)) {
2714                                 ret = PTR_ERR(trans);
2715                                 break;
2716                         }
2717                         fprintf(stderr, "adding missing dir index/item pair "
2718                                 "for inode %llu\n",
2719                                 (unsigned long long)rec->ino);
2720                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2721                                                     backref->namelen,
2722                                                     backref->dir, &location,
2723                                                     imode_to_type(rec->imode),
2724                                                     backref->index);
2725                         BUG_ON(ret);
2726                         btrfs_commit_transaction(trans, root);
2727                         repaired++;
2728                 }
2729
2730                 if (!delete && (backref->found_inode_ref &&
2731                                 backref->found_dir_index &&
2732                                 backref->found_dir_item &&
2733                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2734                                 !rec->found_inode_item)) {
2735                         ret = create_inode_item(root, rec, 0);
2736                         if (ret)
2737                                 break;
2738                         repaired++;
2739                 }
2740
2741         }
2742         return ret ? ret : repaired;
2743 }
2744
2745 /*
2746  * To determine the file type for nlink/inode_item repair
2747  *
2748  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2749  * Return -ENOENT if file type is not found.
2750  */
2751 static int find_file_type(struct inode_record *rec, u8 *type)
2752 {
2753         struct inode_backref *backref;
2754
2755         /* For inode item recovered case */
2756         if (rec->found_inode_item) {
2757                 *type = imode_to_type(rec->imode);
2758                 return 0;
2759         }
2760
2761         list_for_each_entry(backref, &rec->backrefs, list) {
2762                 if (backref->found_dir_index || backref->found_dir_item) {
2763                         *type = backref->filetype;
2764                         return 0;
2765                 }
2766         }
2767         return -ENOENT;
2768 }
2769
2770 /*
2771  * To determine the file name for nlink repair
2772  *
2773  * Return 0 if file name is found, set name and namelen.
2774  * Return -ENOENT if file name is not found.
2775  */
2776 static int find_file_name(struct inode_record *rec,
2777                           char *name, int *namelen)
2778 {
2779         struct inode_backref *backref;
2780
2781         list_for_each_entry(backref, &rec->backrefs, list) {
2782                 if (backref->found_dir_index || backref->found_dir_item ||
2783                     backref->found_inode_ref) {
2784                         memcpy(name, backref->name, backref->namelen);
2785                         *namelen = backref->namelen;
2786                         return 0;
2787                 }
2788         }
2789         return -ENOENT;
2790 }
2791
2792 /* Reset the nlink of the inode to the correct one */
2793 static int reset_nlink(struct btrfs_trans_handle *trans,
2794                        struct btrfs_root *root,
2795                        struct btrfs_path *path,
2796                        struct inode_record *rec)
2797 {
2798         struct inode_backref *backref;
2799         struct inode_backref *tmp;
2800         struct btrfs_key key;
2801         struct btrfs_inode_item *inode_item;
2802         int ret = 0;
2803
2804         /* We don't believe this either, reset it and iterate backref */
2805         rec->found_link = 0;
2806
2807         /* Remove all backref including the valid ones */
2808         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2809                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2810                                    backref->index, backref->name,
2811                                    backref->namelen, 0);
2812                 if (ret < 0)
2813                         goto out;
2814
2815                 /* remove invalid backref, so it won't be added back */
2816                 if (!(backref->found_dir_index &&
2817                       backref->found_dir_item &&
2818                       backref->found_inode_ref)) {
2819                         list_del(&backref->list);
2820                         free(backref);
2821                 } else {
2822                         rec->found_link++;
2823                 }
2824         }
2825
2826         /* Set nlink to 0 */
2827         key.objectid = rec->ino;
2828         key.type = BTRFS_INODE_ITEM_KEY;
2829         key.offset = 0;
2830         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2831         if (ret < 0)
2832                 goto out;
2833         if (ret > 0) {
2834                 ret = -ENOENT;
2835                 goto out;
2836         }
2837         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2838                                     struct btrfs_inode_item);
2839         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2840         btrfs_mark_buffer_dirty(path->nodes[0]);
2841         btrfs_release_path(path);
2842
2843         /*
2844          * Add back valid inode_ref/dir_item/dir_index,
2845          * add_link() will handle the nlink inc, so new nlink must be correct
2846          */
2847         list_for_each_entry(backref, &rec->backrefs, list) {
2848                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2849                                      backref->name, backref->namelen,
2850                                      backref->filetype, &backref->index, 1);
2851                 if (ret < 0)
2852                         goto out;
2853         }
2854 out:
2855         btrfs_release_path(path);
2856         return ret;
2857 }
2858
2859 static int get_highest_inode(struct btrfs_trans_handle *trans,
2860                                 struct btrfs_root *root,
2861                                 struct btrfs_path *path,
2862                                 u64 *highest_ino)
2863 {
2864         struct btrfs_key key, found_key;
2865         int ret;
2866
2867         btrfs_init_path(path);
2868         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2869         key.offset = -1;
2870         key.type = BTRFS_INODE_ITEM_KEY;
2871         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2872         if (ret == 1) {
2873                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2874                                 path->slots[0] - 1);
2875                 *highest_ino = found_key.objectid;
2876                 ret = 0;
2877         }
2878         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2879                 ret = -EOVERFLOW;
2880         btrfs_release_path(path);
2881         return ret;
2882 }
2883
2884 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2885                                struct btrfs_root *root,
2886                                struct btrfs_path *path,
2887                                struct inode_record *rec)
2888 {
2889         char *dir_name = "lost+found";
2890         char namebuf[BTRFS_NAME_LEN] = {0};
2891         u64 lost_found_ino;
2892         u32 mode = 0700;
2893         u8 type = 0;
2894         int namelen = 0;
2895         int name_recovered = 0;
2896         int type_recovered = 0;
2897         int ret = 0;
2898
2899         /*
2900          * Get file name and type first before these invalid inode ref
2901          * are deleted by remove_all_invalid_backref()
2902          */
2903         name_recovered = !find_file_name(rec, namebuf, &namelen);
2904         type_recovered = !find_file_type(rec, &type);
2905
2906         if (!name_recovered) {
2907                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2908                        rec->ino, rec->ino);
2909                 namelen = count_digits(rec->ino);
2910                 sprintf(namebuf, "%llu", rec->ino);
2911                 name_recovered = 1;
2912         }
2913         if (!type_recovered) {
2914                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2915                        rec->ino);
2916                 type = BTRFS_FT_REG_FILE;
2917                 type_recovered = 1;
2918         }
2919
2920         ret = reset_nlink(trans, root, path, rec);
2921         if (ret < 0) {
2922                 fprintf(stderr,
2923                         "Failed to reset nlink for inode %llu: %s\n",
2924                         rec->ino, strerror(-ret));
2925                 goto out;
2926         }
2927
2928         if (rec->found_link == 0) {
2929                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2930                 if (ret < 0)
2931                         goto out;
2932                 lost_found_ino++;
2933                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2934                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2935                                   mode);
2936                 if (ret < 0) {
2937                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2938                                 dir_name, strerror(-ret));
2939                         goto out;
2940                 }
2941                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2942                                      namebuf, namelen, type, NULL, 1);
2943                 /*
2944                  * Add ".INO" suffix several times to handle case where
2945                  * "FILENAME.INO" is already taken by another file.
2946                  */
2947                 while (ret == -EEXIST) {
2948                         /*
2949                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2950                          */
2951                         if (namelen + count_digits(rec->ino) + 1 >
2952                             BTRFS_NAME_LEN) {
2953                                 ret = -EFBIG;
2954                                 goto out;
2955                         }
2956                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2957                                  ".%llu", rec->ino);
2958                         namelen += count_digits(rec->ino) + 1;
2959                         ret = btrfs_add_link(trans, root, rec->ino,
2960                                              lost_found_ino, namebuf,
2961                                              namelen, type, NULL, 1);
2962                 }
2963                 if (ret < 0) {
2964                         fprintf(stderr,
2965                                 "Failed to link the inode %llu to %s dir: %s\n",
2966                                 rec->ino, dir_name, strerror(-ret));
2967                         goto out;
2968                 }
2969                 /*
2970                  * Just increase the found_link, don't actually add the
2971                  * backref. This will make things easier and this inode
2972                  * record will be freed after the repair is done.
2973                  * So fsck will not report problem about this inode.
2974                  */
2975                 rec->found_link++;
2976                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2977                        namelen, namebuf, dir_name);
2978         }
2979         printf("Fixed the nlink of inode %llu\n", rec->ino);
2980 out:
2981         /*
2982          * Clear the flag anyway, or we will loop forever for the same inode
2983          * as it will not be removed from the bad inode list and the dead loop
2984          * happens.
2985          */
2986         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2987         btrfs_release_path(path);
2988         return ret;
2989 }
2990
2991 /*
2992  * Check if there is any normal(reg or prealloc) file extent for given
2993  * ino.
2994  * This is used to determine the file type when neither its dir_index/item or
2995  * inode_item exists.
2996  *
2997  * This will *NOT* report error, if any error happens, just consider it does
2998  * not have any normal file extent.
2999  */
3000 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3001 {
3002         struct btrfs_path path;
3003         struct btrfs_key key;
3004         struct btrfs_key found_key;
3005         struct btrfs_file_extent_item *fi;
3006         u8 type;
3007         int ret = 0;
3008
3009         btrfs_init_path(&path);
3010         key.objectid = ino;
3011         key.type = BTRFS_EXTENT_DATA_KEY;
3012         key.offset = 0;
3013
3014         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3015         if (ret < 0) {
3016                 ret = 0;
3017                 goto out;
3018         }
3019         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3020                 ret = btrfs_next_leaf(root, &path);
3021                 if (ret) {
3022                         ret = 0;
3023                         goto out;
3024                 }
3025         }
3026         while (1) {
3027                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3028                                       path.slots[0]);
3029                 if (found_key.objectid != ino ||
3030                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3031                         break;
3032                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3033                                     struct btrfs_file_extent_item);
3034                 type = btrfs_file_extent_type(path.nodes[0], fi);
3035                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3036                         ret = 1;
3037                         goto out;
3038                 }
3039         }
3040 out:
3041         btrfs_release_path(&path);
3042         return ret;
3043 }
3044
3045 static u32 btrfs_type_to_imode(u8 type)
3046 {
3047         static u32 imode_by_btrfs_type[] = {
3048                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3049                 [BTRFS_FT_DIR]          = S_IFDIR,
3050                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3051                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3052                 [BTRFS_FT_FIFO]         = S_IFIFO,
3053                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3054                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3055         };
3056
3057         return imode_by_btrfs_type[(type)];
3058 }
3059
3060 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3061                                 struct btrfs_root *root,
3062                                 struct btrfs_path *path,
3063                                 struct inode_record *rec)
3064 {
3065         u8 filetype;
3066         u32 mode = 0700;
3067         int type_recovered = 0;
3068         int ret = 0;
3069
3070         printf("Trying to rebuild inode:%llu\n", rec->ino);
3071
3072         type_recovered = !find_file_type(rec, &filetype);
3073
3074         /*
3075          * Try to determine inode type if type not found.
3076          *
3077          * For found regular file extent, it must be FILE.
3078          * For found dir_item/index, it must be DIR.
3079          *
3080          * For undetermined one, use FILE as fallback.
3081          *
3082          * TODO:
3083          * 1. If found backref(inode_index/item is already handled) to it,
3084          *    it must be DIR.
3085          *    Need new inode-inode ref structure to allow search for that.
3086          */
3087         if (!type_recovered) {
3088                 if (rec->found_file_extent &&
3089                     find_normal_file_extent(root, rec->ino)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else if (rec->found_dir_item) {
3093                         type_recovered = 1;
3094                         filetype = BTRFS_FT_DIR;
3095                 } else if (!list_empty(&rec->orphan_extents)) {
3096                         type_recovered = 1;
3097                         filetype = BTRFS_FT_REG_FILE;
3098                 } else{
3099                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3100                                rec->ino);
3101                         type_recovered = 1;
3102                         filetype = BTRFS_FT_REG_FILE;
3103                 }
3104         }
3105
3106         ret = btrfs_new_inode(trans, root, rec->ino,
3107                               mode | btrfs_type_to_imode(filetype));
3108         if (ret < 0)
3109                 goto out;
3110
3111         /*
3112          * Here inode rebuild is done, we only rebuild the inode item,
3113          * don't repair the nlink(like move to lost+found).
3114          * That is the job of nlink repair.
3115          *
3116          * We just fill the record and return
3117          */
3118         rec->found_dir_item = 1;
3119         rec->imode = mode | btrfs_type_to_imode(filetype);
3120         rec->nlink = 0;
3121         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3122         /* Ensure the inode_nlinks repair function will be called */
3123         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3124 out:
3125         return ret;
3126 }
3127
3128 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3129                                       struct btrfs_root *root,
3130                                       struct btrfs_path *path,
3131                                       struct inode_record *rec)
3132 {
3133         struct orphan_data_extent *orphan;
3134         struct orphan_data_extent *tmp;
3135         int ret = 0;
3136
3137         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3138                 /*
3139                  * Check for conflicting file extents
3140                  *
3141                  * Here we don't know whether the extents is compressed or not,
3142                  * so we can only assume it not compressed nor data offset,
3143                  * and use its disk_len as extent length.
3144                  */
3145                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3146                                        orphan->offset, orphan->disk_len, 0);
3147                 btrfs_release_path(path);
3148                 if (ret < 0)
3149                         goto out;
3150                 if (!ret) {
3151                         fprintf(stderr,
3152                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3153                                 orphan->disk_bytenr, orphan->disk_len);
3154                         ret = btrfs_free_extent(trans,
3155                                         root->fs_info->extent_root,
3156                                         orphan->disk_bytenr, orphan->disk_len,
3157                                         0, root->objectid, orphan->objectid,
3158                                         orphan->offset);
3159                         if (ret < 0)
3160                                 goto out;
3161                 }
3162                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3163                                 orphan->offset, orphan->disk_bytenr,
3164                                 orphan->disk_len, orphan->disk_len);
3165                 if (ret < 0)
3166                         goto out;
3167
3168                 /* Update file size info */
3169                 rec->found_size += orphan->disk_len;
3170                 if (rec->found_size == rec->nbytes)
3171                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3172
3173                 /* Update the file extent hole info too */
3174                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3175                                            orphan->disk_len);
3176                 if (ret < 0)
3177                         goto out;
3178                 if (RB_EMPTY_ROOT(&rec->holes))
3179                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3180
3181                 list_del(&orphan->list);
3182                 free(orphan);
3183         }
3184         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3185 out:
3186         return ret;
3187 }
3188
3189 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3190                                         struct btrfs_root *root,
3191                                         struct btrfs_path *path,
3192                                         struct inode_record *rec)
3193 {
3194         struct rb_node *node;
3195         struct file_extent_hole *hole;
3196         int found = 0;
3197         int ret = 0;
3198
3199         node = rb_first(&rec->holes);
3200
3201         while (node) {
3202                 found = 1;
3203                 hole = rb_entry(node, struct file_extent_hole, node);
3204                 ret = btrfs_punch_hole(trans, root, rec->ino,
3205                                        hole->start, hole->len);
3206                 if (ret < 0)
3207                         goto out;
3208                 ret = del_file_extent_hole(&rec->holes, hole->start,
3209                                            hole->len);
3210                 if (ret < 0)
3211                         goto out;
3212                 if (RB_EMPTY_ROOT(&rec->holes))
3213                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3214                 node = rb_first(&rec->holes);
3215         }
3216         /* special case for a file losing all its file extent */
3217         if (!found) {
3218                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3219                                        round_up(rec->isize, root->sectorsize));
3220                 if (ret < 0)
3221                         goto out;
3222         }
3223         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3224                rec->ino, root->objectid);
3225 out:
3226         return ret;
3227 }
3228
3229 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3230 {
3231         struct btrfs_trans_handle *trans;
3232         struct btrfs_path path;
3233         int ret = 0;
3234
3235         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3236                              I_ERR_NO_ORPHAN_ITEM |
3237                              I_ERR_LINK_COUNT_WRONG |
3238                              I_ERR_NO_INODE_ITEM |
3239                              I_ERR_FILE_EXTENT_ORPHAN |
3240                              I_ERR_FILE_EXTENT_DISCOUNT|
3241                              I_ERR_FILE_NBYTES_WRONG)))
3242                 return rec->errors;
3243
3244         /*
3245          * For nlink repair, it may create a dir and add link, so
3246          * 2 for parent(256)'s dir_index and dir_item
3247          * 2 for lost+found dir's inode_item and inode_ref
3248          * 1 for the new inode_ref of the file
3249          * 2 for lost+found dir's dir_index and dir_item for the file
3250          */
3251         trans = btrfs_start_transaction(root, 7);
3252         if (IS_ERR(trans))
3253                 return PTR_ERR(trans);
3254
3255         btrfs_init_path(&path);
3256         if (rec->errors & I_ERR_NO_INODE_ITEM)
3257                 ret = repair_inode_no_item(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3259                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3261                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3263                 ret = repair_inode_isize(trans, root, &path, rec);
3264         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3265                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3266         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3267                 ret = repair_inode_nlinks(trans, root, &path, rec);
3268         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3269                 ret = repair_inode_nbytes(trans, root, &path, rec);
3270         btrfs_commit_transaction(trans, root);
3271         btrfs_release_path(&path);
3272         return ret;
3273 }
3274
3275 static int check_inode_recs(struct btrfs_root *root,
3276                             struct cache_tree *inode_cache)
3277 {
3278         struct cache_extent *cache;
3279         struct ptr_node *node;
3280         struct inode_record *rec;
3281         struct inode_backref *backref;
3282         int stage = 0;
3283         int ret = 0;
3284         int err = 0;
3285         u64 error = 0;
3286         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3287
3288         if (btrfs_root_refs(&root->root_item) == 0) {
3289                 if (!cache_tree_empty(inode_cache))
3290                         fprintf(stderr, "warning line %d\n", __LINE__);
3291                 return 0;
3292         }
3293
3294         /*
3295          * We need to repair backrefs first because we could change some of the
3296          * errors in the inode recs.
3297          *
3298          * We also need to go through and delete invalid backrefs first and then
3299          * add the correct ones second.  We do this because we may get EEXIST
3300          * when adding back the correct index because we hadn't yet deleted the
3301          * invalid index.
3302          *
3303          * For example, if we were missing a dir index then the directories
3304          * isize would be wrong, so if we fixed the isize to what we thought it
3305          * would be and then fixed the backref we'd still have a invalid fs, so
3306          * we need to add back the dir index and then check to see if the isize
3307          * is still wrong.
3308          */
3309         while (stage < 3) {
3310                 stage++;
3311                 if (stage == 3 && !err)
3312                         break;
3313
3314                 cache = search_cache_extent(inode_cache, 0);
3315                 while (repair && cache) {
3316                         node = container_of(cache, struct ptr_node, cache);
3317                         rec = node->data;
3318                         cache = next_cache_extent(cache);
3319
3320                         /* Need to free everything up and rescan */
3321                         if (stage == 3) {
3322                                 remove_cache_extent(inode_cache, &node->cache);
3323                                 free(node);
3324                                 free_inode_rec(rec);
3325                                 continue;
3326                         }
3327
3328                         if (list_empty(&rec->backrefs))
3329                                 continue;
3330
3331                         ret = repair_inode_backrefs(root, rec, inode_cache,
3332                                                     stage == 1);
3333                         if (ret < 0) {
3334                                 err = ret;
3335                                 stage = 2;
3336                                 break;
3337                         } if (ret > 0) {
3338                                 err = -EAGAIN;
3339                         }
3340                 }
3341         }
3342         if (err)
3343                 return err;
3344
3345         rec = get_inode_rec(inode_cache, root_dirid, 0);
3346         BUG_ON(IS_ERR(rec));
3347         if (rec) {
3348                 ret = check_root_dir(rec);
3349                 if (ret) {
3350                         fprintf(stderr, "root %llu root dir %llu error\n",
3351                                 (unsigned long long)root->root_key.objectid,
3352                                 (unsigned long long)root_dirid);
3353                         print_inode_error(root, rec);
3354                         error++;
3355                 }
3356         } else {
3357                 if (repair) {
3358                         struct btrfs_trans_handle *trans;
3359
3360                         trans = btrfs_start_transaction(root, 1);
3361                         if (IS_ERR(trans)) {
3362                                 err = PTR_ERR(trans);
3363                                 return err;
3364                         }
3365
3366                         fprintf(stderr,
3367                                 "root %llu missing its root dir, recreating\n",
3368                                 (unsigned long long)root->objectid);
3369
3370                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3371                         BUG_ON(ret);
3372
3373                         btrfs_commit_transaction(trans, root);
3374                         return -EAGAIN;
3375                 }
3376
3377                 fprintf(stderr, "root %llu root dir %llu not found\n",
3378                         (unsigned long long)root->root_key.objectid,
3379                         (unsigned long long)root_dirid);
3380         }
3381
3382         while (1) {
3383                 cache = search_cache_extent(inode_cache, 0);
3384                 if (!cache)
3385                         break;
3386                 node = container_of(cache, struct ptr_node, cache);
3387                 rec = node->data;
3388                 remove_cache_extent(inode_cache, &node->cache);
3389                 free(node);
3390                 if (rec->ino == root_dirid ||
3391                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3392                         free_inode_rec(rec);
3393                         continue;
3394                 }
3395
3396                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3397                         ret = check_orphan_item(root, rec->ino);
3398                         if (ret == 0)
3399                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3400                         if (can_free_inode_rec(rec)) {
3401                                 free_inode_rec(rec);
3402                                 continue;
3403                         }
3404                 }
3405
3406                 if (!rec->found_inode_item)
3407                         rec->errors |= I_ERR_NO_INODE_ITEM;
3408                 if (rec->found_link != rec->nlink)
3409                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3410                 if (repair) {
3411                         ret = try_repair_inode(root, rec);
3412                         if (ret == 0 && can_free_inode_rec(rec)) {
3413                                 free_inode_rec(rec);
3414                                 continue;
3415                         }
3416                         ret = 0;
3417                 }
3418
3419                 if (!(repair && ret == 0))
3420                         error++;
3421                 print_inode_error(root, rec);
3422                 list_for_each_entry(backref, &rec->backrefs, list) {
3423                         if (!backref->found_dir_item)
3424                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3425                         if (!backref->found_dir_index)
3426                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3427                         if (!backref->found_inode_ref)
3428                                 backref->errors |= REF_ERR_NO_INODE_REF;
3429                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3430                                 " namelen %u name %s filetype %d errors %x",
3431                                 (unsigned long long)backref->dir,
3432                                 (unsigned long long)backref->index,
3433                                 backref->namelen, backref->name,
3434                                 backref->filetype, backref->errors);
3435                         print_ref_error(backref->errors);
3436                 }
3437                 free_inode_rec(rec);
3438         }
3439         return (error > 0) ? -1 : 0;
3440 }
3441
3442 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3443                                         u64 objectid)
3444 {
3445         struct cache_extent *cache;
3446         struct root_record *rec = NULL;
3447         int ret;
3448
3449         cache = lookup_cache_extent(root_cache, objectid, 1);
3450         if (cache) {
3451                 rec = container_of(cache, struct root_record, cache);
3452         } else {
3453                 rec = calloc(1, sizeof(*rec));
3454                 if (!rec)
3455                         return ERR_PTR(-ENOMEM);
3456                 rec->objectid = objectid;
3457                 INIT_LIST_HEAD(&rec->backrefs);
3458                 rec->cache.start = objectid;
3459                 rec->cache.size = 1;
3460
3461                 ret = insert_cache_extent(root_cache, &rec->cache);
3462                 if (ret)
3463                         return ERR_PTR(-EEXIST);
3464         }
3465         return rec;
3466 }
3467
3468 static struct root_backref *get_root_backref(struct root_record *rec,
3469                                              u64 ref_root, u64 dir, u64 index,
3470                                              const char *name, int namelen)
3471 {
3472         struct root_backref *backref;
3473
3474         list_for_each_entry(backref, &rec->backrefs, list) {
3475                 if (backref->ref_root != ref_root || backref->dir != dir ||
3476                     backref->namelen != namelen)
3477                         continue;
3478                 if (memcmp(name, backref->name, namelen))
3479                         continue;
3480                 return backref;
3481         }
3482
3483         backref = calloc(1, sizeof(*backref) + namelen + 1);
3484         if (!backref)
3485                 return NULL;
3486         backref->ref_root = ref_root;
3487         backref->dir = dir;
3488         backref->index = index;
3489         backref->namelen = namelen;
3490         memcpy(backref->name, name, namelen);
3491         backref->name[namelen] = '\0';
3492         list_add_tail(&backref->list, &rec->backrefs);
3493         return backref;
3494 }
3495
3496 static void free_root_record(struct cache_extent *cache)
3497 {
3498         struct root_record *rec;
3499         struct root_backref *backref;
3500
3501         rec = container_of(cache, struct root_record, cache);
3502         while (!list_empty(&rec->backrefs)) {
3503                 backref = to_root_backref(rec->backrefs.next);
3504                 list_del(&backref->list);
3505                 free(backref);
3506         }
3507
3508         free(rec);
3509 }
3510
3511 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3512
3513 static int add_root_backref(struct cache_tree *root_cache,
3514                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3515                             const char *name, int namelen,
3516                             int item_type, int errors)
3517 {
3518         struct root_record *rec;
3519         struct root_backref *backref;
3520
3521         rec = get_root_rec(root_cache, root_id);
3522         BUG_ON(IS_ERR(rec));
3523         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3524         BUG_ON(!backref);
3525
3526         backref->errors |= errors;
3527
3528         if (item_type != BTRFS_DIR_ITEM_KEY) {
3529                 if (backref->found_dir_index || backref->found_back_ref ||
3530                     backref->found_forward_ref) {
3531                         if (backref->index != index)
3532                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3533                 } else {
3534                         backref->index = index;
3535                 }
3536         }
3537
3538         if (item_type == BTRFS_DIR_ITEM_KEY) {
3539                 if (backref->found_forward_ref)
3540                         rec->found_ref++;
3541                 backref->found_dir_item = 1;
3542         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3543                 backref->found_dir_index = 1;
3544         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3545                 if (backref->found_forward_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3547                 else if (backref->found_dir_item)
3548                         rec->found_ref++;
3549                 backref->found_forward_ref = 1;
3550         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3551                 if (backref->found_back_ref)
3552                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3553                 backref->found_back_ref = 1;
3554         } else {
3555                 BUG_ON(1);
3556         }
3557
3558         if (backref->found_forward_ref && backref->found_dir_item)
3559                 backref->reachable = 1;
3560         return 0;
3561 }
3562
3563 static int merge_root_recs(struct btrfs_root *root,
3564                            struct cache_tree *src_cache,
3565                            struct cache_tree *dst_cache)
3566 {
3567         struct cache_extent *cache;
3568         struct ptr_node *node;
3569         struct inode_record *rec;
3570         struct inode_backref *backref;
3571         int ret = 0;
3572
3573         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3574                 free_inode_recs_tree(src_cache);
3575                 return 0;
3576         }
3577
3578         while (1) {
3579                 cache = search_cache_extent(src_cache, 0);
3580                 if (!cache)
3581                         break;
3582                 node = container_of(cache, struct ptr_node, cache);
3583                 rec = node->data;
3584                 remove_cache_extent(src_cache, &node->cache);
3585                 free(node);
3586
3587                 ret = is_child_root(root, root->objectid, rec->ino);
3588                 if (ret < 0)
3589                         break;
3590                 else if (ret == 0)
3591                         goto skip;
3592
3593                 list_for_each_entry(backref, &rec->backrefs, list) {
3594                         BUG_ON(backref->found_inode_ref);
3595                         if (backref->found_dir_item)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3600                                         backref->errors);
3601                         if (backref->found_dir_index)
3602                                 add_root_backref(dst_cache, rec->ino,
3603                                         root->root_key.objectid, backref->dir,
3604                                         backref->index, backref->name,
3605                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3606                                         backref->errors);
3607                 }
3608 skip:
3609                 free_inode_rec(rec);
3610         }
3611         if (ret < 0)
3612                 return ret;
3613         return 0;
3614 }
3615
3616 static int check_root_refs(struct btrfs_root *root,
3617                            struct cache_tree *root_cache)
3618 {
3619         struct root_record *rec;
3620         struct root_record *ref_root;
3621         struct root_backref *backref;
3622         struct cache_extent *cache;
3623         int loop = 1;
3624         int ret;
3625         int error;
3626         int errors = 0;
3627
3628         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3629         BUG_ON(IS_ERR(rec));
3630         rec->found_ref = 1;
3631
3632         /* fixme: this can not detect circular references */
3633         while (loop) {
3634                 loop = 0;
3635                 cache = search_cache_extent(root_cache, 0);
3636                 while (1) {
3637                         if (!cache)
3638                                 break;
3639                         rec = container_of(cache, struct root_record, cache);
3640                         cache = next_cache_extent(cache);
3641
3642                         if (rec->found_ref == 0)
3643                                 continue;
3644
3645                         list_for_each_entry(backref, &rec->backrefs, list) {
3646                                 if (!backref->reachable)
3647                                         continue;
3648
3649                                 ref_root = get_root_rec(root_cache,
3650                                                         backref->ref_root);
3651                                 BUG_ON(IS_ERR(ref_root));
3652                                 if (ref_root->found_ref > 0)
3653                                         continue;
3654
3655                                 backref->reachable = 0;
3656                                 rec->found_ref--;
3657                                 if (rec->found_ref == 0)
3658                                         loop = 1;
3659                         }
3660                 }
3661         }
3662
3663         cache = search_cache_extent(root_cache, 0);
3664         while (1) {
3665                 if (!cache)
3666                         break;
3667                 rec = container_of(cache, struct root_record, cache);
3668                 cache = next_cache_extent(cache);
3669
3670                 if (rec->found_ref == 0 &&
3671                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3672                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3673                         ret = check_orphan_item(root->fs_info->tree_root,
3674                                                 rec->objectid);
3675                         if (ret == 0)
3676                                 continue;
3677
3678                         /*
3679                          * If we don't have a root item then we likely just have
3680                          * a dir item in a snapshot for this root but no actual
3681                          * ref key or anything so it's meaningless.
3682                          */
3683                         if (!rec->found_root_item)
3684                                 continue;
3685                         errors++;
3686                         fprintf(stderr, "fs tree %llu not referenced\n",
3687                                 (unsigned long long)rec->objectid);
3688                 }
3689
3690                 error = 0;
3691                 if (rec->found_ref > 0 && !rec->found_root_item)
3692                         error = 1;
3693                 list_for_each_entry(backref, &rec->backrefs, list) {
3694                         if (!backref->found_dir_item)
3695                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3696                         if (!backref->found_dir_index)
3697                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3698                         if (!backref->found_back_ref)
3699                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3700                         if (!backref->found_forward_ref)
3701                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3702                         if (backref->reachable && backref->errors)
3703                                 error = 1;
3704                 }
3705                 if (!error)
3706                         continue;
3707
3708                 errors++;
3709                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3710                         (unsigned long long)rec->objectid, rec->found_ref,
3711                          rec->found_root_item ? "" : "not found");
3712
3713                 list_for_each_entry(backref, &rec->backrefs, list) {
3714                         if (!backref->reachable)
3715                                 continue;
3716                         if (!backref->errors && rec->found_root_item)
3717                                 continue;
3718                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3719                                 " index %llu namelen %u name %s errors %x\n",
3720                                 (unsigned long long)backref->ref_root,
3721                                 (unsigned long long)backref->dir,
3722                                 (unsigned long long)backref->index,
3723                                 backref->namelen, backref->name,
3724                                 backref->errors);
3725                         print_ref_error(backref->errors);
3726                 }
3727         }
3728         return errors > 0 ? 1 : 0;
3729 }
3730
3731 static int process_root_ref(struct extent_buffer *eb, int slot,
3732                             struct btrfs_key *key,
3733                             struct cache_tree *root_cache)
3734 {
3735         u64 dirid;
3736         u64 index;
3737         u32 len;
3738         u32 name_len;
3739         struct btrfs_root_ref *ref;
3740         char namebuf[BTRFS_NAME_LEN];
3741         int error;
3742
3743         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3744
3745         dirid = btrfs_root_ref_dirid(eb, ref);
3746         index = btrfs_root_ref_sequence(eb, ref);
3747         name_len = btrfs_root_ref_name_len(eb, ref);
3748
3749         if (name_len <= BTRFS_NAME_LEN) {
3750                 len = name_len;
3751                 error = 0;
3752         } else {
3753                 len = BTRFS_NAME_LEN;
3754                 error = REF_ERR_NAME_TOO_LONG;
3755         }
3756         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3757
3758         if (key->type == BTRFS_ROOT_REF_KEY) {
3759                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3760                                  index, namebuf, len, key->type, error);
3761         } else {
3762                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3763                                  index, namebuf, len, key->type, error);
3764         }
3765         return 0;
3766 }
3767
3768 static void free_corrupt_block(struct cache_extent *cache)
3769 {
3770         struct btrfs_corrupt_block *corrupt;
3771
3772         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3773         free(corrupt);
3774 }
3775
3776 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3777
3778 /*
3779  * Repair the btree of the given root.
3780  *
3781  * The fix is to remove the node key in corrupt_blocks cache_tree.
3782  * and rebalance the tree.
3783  * After the fix, the btree should be writeable.
3784  */
3785 static int repair_btree(struct btrfs_root *root,
3786                         struct cache_tree *corrupt_blocks)
3787 {
3788         struct btrfs_trans_handle *trans;
3789         struct btrfs_path path;
3790         struct btrfs_corrupt_block *corrupt;
3791         struct cache_extent *cache;
3792         struct btrfs_key key;
3793         u64 offset;
3794         int level;
3795         int ret = 0;
3796
3797         if (cache_tree_empty(corrupt_blocks))
3798                 return 0;
3799
3800         trans = btrfs_start_transaction(root, 1);
3801         if (IS_ERR(trans)) {
3802                 ret = PTR_ERR(trans);
3803                 fprintf(stderr, "Error starting transaction: %s\n",
3804                         strerror(-ret));
3805                 return ret;
3806         }
3807         btrfs_init_path(&path);
3808         cache = first_cache_extent(corrupt_blocks);
3809         while (cache) {
3810                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3811                                        cache);
3812                 level = corrupt->level;
3813                 path.lowest_level = level;
3814                 key.objectid = corrupt->key.objectid;
3815                 key.type = corrupt->key.type;
3816                 key.offset = corrupt->key.offset;
3817
3818                 /*
3819                  * Here we don't want to do any tree balance, since it may
3820                  * cause a balance with corrupted brother leaf/node,
3821                  * so ins_len set to 0 here.
3822                  * Balance will be done after all corrupt node/leaf is deleted.
3823                  */
3824                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3825                 if (ret < 0)
3826                         goto out;
3827                 offset = btrfs_node_blockptr(path.nodes[level],
3828                                              path.slots[level]);
3829
3830                 /* Remove the ptr */
3831                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3832                 if (ret < 0)
3833                         goto out;
3834                 /*
3835                  * Remove the corresponding extent
3836                  * return value is not concerned.
3837                  */
3838                 btrfs_release_path(&path);
3839                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3840                                         0, root->root_key.objectid,
3841                                         level - 1, 0);
3842                 cache = next_cache_extent(cache);
3843         }
3844
3845         /* Balance the btree using btrfs_search_slot() */
3846         cache = first_cache_extent(corrupt_blocks);
3847         while (cache) {
3848                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3849                                        cache);
3850                 memcpy(&key, &corrupt->key, sizeof(key));
3851                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3852                 if (ret < 0)
3853                         goto out;
3854                 /* return will always >0 since it won't find the item */
3855                 ret = 0;
3856                 btrfs_release_path(&path);
3857                 cache = next_cache_extent(cache);
3858         }
3859 out:
3860         btrfs_commit_transaction(trans, root);
3861         btrfs_release_path(&path);
3862         return ret;
3863 }
3864
3865 static int check_fs_root(struct btrfs_root *root,
3866                          struct cache_tree *root_cache,
3867                          struct walk_control *wc)
3868 {
3869         int ret = 0;
3870         int err = 0;
3871         int wret;
3872         int level;
3873         struct btrfs_path path;
3874         struct shared_node root_node;
3875         struct root_record *rec;
3876         struct btrfs_root_item *root_item = &root->root_item;
3877         struct cache_tree corrupt_blocks;
3878         struct orphan_data_extent *orphan;
3879         struct orphan_data_extent *tmp;
3880         enum btrfs_tree_block_status status;
3881         struct node_refs nrefs;
3882
3883         /*
3884          * Reuse the corrupt_block cache tree to record corrupted tree block
3885          *
3886          * Unlike the usage in extent tree check, here we do it in a per
3887          * fs/subvol tree base.
3888          */
3889         cache_tree_init(&corrupt_blocks);
3890         root->fs_info->corrupt_blocks = &corrupt_blocks;
3891
3892         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3893                 rec = get_root_rec(root_cache, root->root_key.objectid);
3894                 BUG_ON(IS_ERR(rec));
3895                 if (btrfs_root_refs(root_item) > 0)
3896                         rec->found_root_item = 1;
3897         }
3898
3899         btrfs_init_path(&path);
3900         memset(&root_node, 0, sizeof(root_node));
3901         cache_tree_init(&root_node.root_cache);
3902         cache_tree_init(&root_node.inode_cache);
3903         memset(&nrefs, 0, sizeof(nrefs));
3904
3905         /* Move the orphan extent record to corresponding inode_record */
3906         list_for_each_entry_safe(orphan, tmp,
3907                                  &root->orphan_data_extents, list) {
3908                 struct inode_record *inode;
3909
3910                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3911                                       1);
3912                 BUG_ON(IS_ERR(inode));
3913                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3914                 list_move(&orphan->list, &inode->orphan_extents);
3915         }
3916
3917         level = btrfs_header_level(root->node);
3918         memset(wc->nodes, 0, sizeof(wc->nodes));
3919         wc->nodes[level] = &root_node;
3920         wc->active_node = level;
3921         wc->root_level = level;
3922
3923         /* We may not have checked the root block, lets do that now */
3924         if (btrfs_is_leaf(root->node))
3925                 status = btrfs_check_leaf(root, NULL, root->node);
3926         else
3927                 status = btrfs_check_node(root, NULL, root->node);
3928         if (status != BTRFS_TREE_BLOCK_CLEAN)
3929                 return -EIO;
3930
3931         if (btrfs_root_refs(root_item) > 0 ||
3932             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3933                 path.nodes[level] = root->node;
3934                 extent_buffer_get(root->node);
3935                 path.slots[level] = 0;
3936         } else {
3937                 struct btrfs_key key;
3938                 struct btrfs_disk_key found_key;
3939
3940                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3941                 level = root_item->drop_level;
3942                 path.lowest_level = level;
3943                 if (level > btrfs_header_level(root->node) ||
3944                     level >= BTRFS_MAX_LEVEL) {
3945                         error("ignoring invalid drop level: %u", level);
3946                         goto skip_walking;
3947                 }
3948                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3949                 if (wret < 0)
3950                         goto skip_walking;
3951                 btrfs_node_key(path.nodes[level], &found_key,
3952                                 path.slots[level]);
3953                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3954                                         sizeof(found_key)));
3955         }
3956
3957         while (1) {
3958                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963
3964                 wret = walk_up_tree(root, &path, wc, &level);
3965                 if (wret < 0)
3966                         ret = wret;
3967                 if (wret != 0)
3968                         break;
3969         }
3970 skip_walking:
3971         btrfs_release_path(&path);
3972
3973         if (!cache_tree_empty(&corrupt_blocks)) {
3974                 struct cache_extent *cache;
3975                 struct btrfs_corrupt_block *corrupt;
3976
3977                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3978                        root->root_key.objectid);
3979                 cache = first_cache_extent(&corrupt_blocks);
3980                 while (cache) {
3981                         corrupt = container_of(cache,
3982                                                struct btrfs_corrupt_block,
3983                                                cache);
3984                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3985                                cache->start, corrupt->level,
3986                                corrupt->key.objectid, corrupt->key.type,
3987                                corrupt->key.offset);
3988                         cache = next_cache_extent(cache);
3989                 }
3990                 if (repair) {
3991                         printf("Try to repair the btree for root %llu\n",
3992                                root->root_key.objectid);
3993                         ret = repair_btree(root, &corrupt_blocks);
3994                         if (ret < 0)
3995                                 fprintf(stderr, "Failed to repair btree: %s\n",
3996                                         strerror(-ret));
3997                         if (!ret)
3998                                 printf("Btree for root %llu is fixed\n",
3999                                        root->root_key.objectid);
4000                 }
4001         }
4002
4003         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4004         if (err < 0)
4005                 ret = err;
4006
4007         if (root_node.current) {
4008                 root_node.current->checked = 1;
4009                 maybe_free_inode_rec(&root_node.inode_cache,
4010                                 root_node.current);
4011         }
4012
4013         err = check_inode_recs(root, &root_node.inode_cache);
4014         if (!ret)
4015                 ret = err;
4016
4017         free_corrupt_blocks_tree(&corrupt_blocks);
4018         root->fs_info->corrupt_blocks = NULL;
4019         free_orphan_data_extents(&root->orphan_data_extents);
4020         return ret;
4021 }
4022
4023 static int fs_root_objectid(u64 objectid)
4024 {
4025         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4026             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4027                 return 1;
4028         return is_fstree(objectid);
4029 }
4030
4031 static int check_fs_roots(struct btrfs_root *root,
4032                           struct cache_tree *root_cache)
4033 {
4034         struct btrfs_path path;
4035         struct btrfs_key key;
4036         struct walk_control wc;
4037         struct extent_buffer *leaf, *tree_node;
4038         struct btrfs_root *tmp_root;
4039         struct btrfs_root *tree_root = root->fs_info->tree_root;
4040         int ret;
4041         int err = 0;
4042
4043         if (ctx.progress_enabled) {
4044                 ctx.tp = TASK_FS_ROOTS;
4045                 task_start(ctx.info);
4046         }
4047
4048         /*
4049          * Just in case we made any changes to the extent tree that weren't
4050          * reflected into the free space cache yet.
4051          */
4052         if (repair)
4053                 reset_cached_block_groups(root->fs_info);
4054         memset(&wc, 0, sizeof(wc));
4055         cache_tree_init(&wc.shared);
4056         btrfs_init_path(&path);
4057
4058 again:
4059         key.offset = 0;
4060         key.objectid = 0;
4061         key.type = BTRFS_ROOT_ITEM_KEY;
4062         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4063         if (ret < 0) {
4064                 err = 1;
4065                 goto out;
4066         }
4067         tree_node = tree_root->node;
4068         while (1) {
4069                 if (tree_node != tree_root->node) {
4070                         free_root_recs_tree(root_cache);
4071                         btrfs_release_path(&path);
4072                         goto again;
4073                 }
4074                 leaf = path.nodes[0];
4075                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4076                         ret = btrfs_next_leaf(tree_root, &path);
4077                         if (ret) {
4078                                 if (ret < 0)
4079                                         err = 1;
4080                                 break;
4081                         }
4082                         leaf = path.nodes[0];
4083                 }
4084                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4085                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4086                     fs_root_objectid(key.objectid)) {
4087                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4088                                 tmp_root = btrfs_read_fs_root_no_cache(
4089                                                 root->fs_info, &key);
4090                         } else {
4091                                 key.offset = (u64)-1;
4092                                 tmp_root = btrfs_read_fs_root(
4093                                                 root->fs_info, &key);
4094                         }
4095                         if (IS_ERR(tmp_root)) {
4096                                 err = 1;
4097                                 goto next;
4098                         }
4099                         ret = check_fs_root(tmp_root, root_cache, &wc);
4100                         if (ret == -EAGAIN) {
4101                                 free_root_recs_tree(root_cache);
4102                                 btrfs_release_path(&path);
4103                                 goto again;
4104                         }
4105                         if (ret)
4106                                 err = 1;
4107                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4108                                 btrfs_free_fs_root(tmp_root);
4109                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4110                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4111                         process_root_ref(leaf, path.slots[0], &key,
4112                                          root_cache);
4113                 }
4114 next:
4115                 path.slots[0]++;
4116         }
4117 out:
4118         btrfs_release_path(&path);
4119         if (err)
4120                 free_extent_cache_tree(&wc.shared);
4121         if (!cache_tree_empty(&wc.shared))
4122                 fprintf(stderr, "warning line %d\n", __LINE__);
4123
4124         task_stop(ctx.info);
4125
4126         return err;
4127 }
4128
4129 /*
4130  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4131  * INODE_REF/INODE_EXTREF match.
4132  *
4133  * @root:       the root of the fs/file tree
4134  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4135  * @key:        the key of the DIR_ITEM/DIR_INDEX
4136  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4137  *              distinguish root_dir between normal dir/file
4138  * @name:       the name in the INODE_REF/INODE_EXTREF
4139  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4140  * @mode:       the st_mode of INODE_ITEM
4141  *
4142  * Return 0 if no error occurred.
4143  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4144  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4145  * dir/file.
4146  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4147  * not match for normal dir/file.
4148  */
4149 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4150                          struct btrfs_key *key, u64 index, char *name,
4151                          u32 namelen, u32 mode)
4152 {
4153         struct btrfs_path path;
4154         struct extent_buffer *node;
4155         struct btrfs_dir_item *di;
4156         struct btrfs_key location;
4157         char namebuf[BTRFS_NAME_LEN] = {0};
4158         u32 total;
4159         u32 cur = 0;
4160         u32 len;
4161         u32 name_len;
4162         u32 data_len;
4163         u8 filetype;
4164         int slot;
4165         int ret;
4166
4167         btrfs_init_path(&path);
4168         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4169         if (ret < 0) {
4170                 ret = DIR_ITEM_MISSING;
4171                 goto out;
4172         }
4173
4174         /* Process root dir and goto out*/
4175         if (index == 0) {
4176                 if (ret == 0) {
4177                         ret = ROOT_DIR_ERROR;
4178                         error(
4179                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4180                                 root->objectid,
4181                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4182                                         "REF" : "EXTREF",
4183                                 ref_key->objectid, ref_key->offset,
4184                                 key->type == BTRFS_DIR_ITEM_KEY ?
4185                                         "DIR_ITEM" : "DIR_INDEX");
4186                 } else {
4187                         ret = 0;
4188                 }
4189
4190                 goto out;
4191         }
4192
4193         /* Process normal file/dir */
4194         if (ret > 0) {
4195                 ret = DIR_ITEM_MISSING;
4196                 error(
4197                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4198                         root->objectid,
4199                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4200                         ref_key->objectid, ref_key->offset,
4201                         key->type == BTRFS_DIR_ITEM_KEY ?
4202                                 "DIR_ITEM" : "DIR_INDEX",
4203                         key->objectid, key->offset, namelen, name,
4204                         imode_to_type(mode));
4205                 goto out;
4206         }
4207
4208         /* Check whether inode_id/filetype/name match */
4209         node = path.nodes[0];
4210         slot = path.slots[0];
4211         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4212         total = btrfs_item_size_nr(node, slot);
4213         while (cur < total) {
4214                 ret = DIR_ITEM_MISMATCH;
4215                 name_len = btrfs_dir_name_len(node, di);
4216                 data_len = btrfs_dir_data_len(node, di);
4217
4218                 btrfs_dir_item_key_to_cpu(node, di, &location);
4219                 if (location.objectid != ref_key->objectid ||
4220                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4221                     location.offset != 0)
4222                         goto next;
4223
4224                 filetype = btrfs_dir_type(node, di);
4225                 if (imode_to_type(mode) != filetype)
4226                         goto next;
4227
4228                 if (name_len <= BTRFS_NAME_LEN) {
4229                         len = name_len;
4230                 } else {
4231                         len = BTRFS_NAME_LEN;
4232                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4233                         root->objectid,
4234                         key->type == BTRFS_DIR_ITEM_KEY ?
4235                         "DIR_ITEM" : "DIR_INDEX",
4236                         key->objectid, key->offset, name_len);
4237                 }
4238                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4239                 if (len != namelen || strncmp(namebuf, name, len))
4240                         goto next;
4241
4242                 ret = 0;
4243                 goto out;
4244 next:
4245                 len = sizeof(*di) + name_len + data_len;
4246                 di = (struct btrfs_dir_item *)((char *)di + len);
4247                 cur += len;
4248         }
4249         if (ret == DIR_ITEM_MISMATCH)
4250                 error(
4251                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4252                         root->objectid,
4253                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4254                         ref_key->objectid, ref_key->offset,
4255                         key->type == BTRFS_DIR_ITEM_KEY ?
4256                                 "DIR_ITEM" : "DIR_INDEX",
4257                         key->objectid, key->offset, namelen, name,
4258                         imode_to_type(mode));
4259 out:
4260         btrfs_release_path(&path);
4261         return ret;
4262 }
4263
4264 /*
4265  * Traverse the given INODE_REF and call find_dir_item() to find related
4266  * DIR_ITEM/DIR_INDEX.
4267  *
4268  * @root:       the root of the fs/file tree
4269  * @ref_key:    the key of the INODE_REF
4270  * @refs:       the count of INODE_REF
4271  * @mode:       the st_mode of INODE_ITEM
4272  *
4273  * Return 0 if no error occurred.
4274  */
4275 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4276                            struct extent_buffer *node, int slot, u64 *refs,
4277                            int mode)
4278 {
4279         struct btrfs_key key;
4280         struct btrfs_inode_ref *ref;
4281         char namebuf[BTRFS_NAME_LEN] = {0};
4282         u32 total;
4283         u32 cur = 0;
4284         u32 len;
4285         u32 name_len;
4286         u64 index;
4287         int ret, err = 0;
4288
4289         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4290         total = btrfs_item_size_nr(node, slot);
4291
4292 next:
4293         /* Update inode ref count */
4294         (*refs)++;
4295
4296         index = btrfs_inode_ref_index(node, ref);
4297         name_len = btrfs_inode_ref_name_len(node, ref);
4298         if (name_len <= BTRFS_NAME_LEN) {
4299                 len = name_len;
4300         } else {
4301                 len = BTRFS_NAME_LEN;
4302                 warning("root %llu INODE_REF[%llu %llu] name too long",
4303                         root->objectid, ref_key->objectid, ref_key->offset);
4304         }
4305
4306         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4307
4308         /* Check root dir ref name */
4309         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4310                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4311                       root->objectid, ref_key->objectid, ref_key->offset,
4312                       namebuf);
4313                 err |= ROOT_DIR_ERROR;
4314         }
4315
4316         /* Find related DIR_INDEX */
4317         key.objectid = ref_key->offset;
4318         key.type = BTRFS_DIR_INDEX_KEY;
4319         key.offset = index;
4320         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4321         err |= ret;
4322
4323         /* Find related dir_item */
4324         key.objectid = ref_key->offset;
4325         key.type = BTRFS_DIR_ITEM_KEY;
4326         key.offset = btrfs_name_hash(namebuf, len);
4327         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4328         err |= ret;
4329
4330         len = sizeof(*ref) + name_len;
4331         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4332         cur += len;
4333         if (cur < total)
4334                 goto next;
4335
4336         return err;
4337 }
4338
4339 /*
4340  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4341  * DIR_ITEM/DIR_INDEX.
4342  *
4343  * @root:       the root of the fs/file tree
4344  * @ref_key:    the key of the INODE_EXTREF
4345  * @refs:       the count of INODE_EXTREF
4346  * @mode:       the st_mode of INODE_ITEM
4347  *
4348  * Return 0 if no error occurred.
4349  */
4350 static int check_inode_extref(struct btrfs_root *root,
4351                               struct btrfs_key *ref_key,
4352                               struct extent_buffer *node, int slot, u64 *refs,
4353                               int mode)
4354 {
4355         struct btrfs_key key;
4356         struct btrfs_inode_extref *extref;
4357         char namebuf[BTRFS_NAME_LEN] = {0};
4358         u32 total;
4359         u32 cur = 0;
4360         u32 len;
4361         u32 name_len;
4362         u64 index;
4363         u64 parent;
4364         int ret;
4365         int err = 0;
4366
4367         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4368         total = btrfs_item_size_nr(node, slot);
4369
4370 next:
4371         /* update inode ref count */
4372         (*refs)++;
4373         name_len = btrfs_inode_extref_name_len(node, extref);
4374         index = btrfs_inode_extref_index(node, extref);
4375         parent = btrfs_inode_extref_parent(node, extref);
4376         if (name_len <= BTRFS_NAME_LEN) {
4377                 len = name_len;
4378         } else {
4379                 len = BTRFS_NAME_LEN;
4380                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4381                         root->objectid, ref_key->objectid, ref_key->offset);
4382         }
4383         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4384
4385         /* Check root dir ref name */
4386         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4387                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4388                       root->objectid, ref_key->objectid, ref_key->offset,
4389                       namebuf);
4390                 err |= ROOT_DIR_ERROR;
4391         }
4392
4393         /* find related dir_index */
4394         key.objectid = parent;
4395         key.type = BTRFS_DIR_INDEX_KEY;
4396         key.offset = index;
4397         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4398         err |= ret;
4399
4400         /* find related dir_item */
4401         key.objectid = parent;
4402         key.type = BTRFS_DIR_ITEM_KEY;
4403         key.offset = btrfs_name_hash(namebuf, len);
4404         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4405         err |= ret;
4406
4407         len = sizeof(*extref) + name_len;
4408         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4409         cur += len;
4410
4411         if (cur < total)
4412                 goto next;
4413
4414         return err;
4415 }
4416
4417 /*
4418  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4419  * DIR_ITEM/DIR_INDEX match.
4420  *
4421  * @root:       the root of the fs/file tree
4422  * @key:        the key of the INODE_REF/INODE_EXTREF
4423  * @name:       the name in the INODE_REF/INODE_EXTREF
4424  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4425  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4426  * to (u64)-1
4427  * @ext_ref:    the EXTENDED_IREF feature
4428  *
4429  * Return 0 if no error occurred.
4430  * Return >0 for error bitmap
4431  */
4432 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4433                           char *name, int namelen, u64 index,
4434                           unsigned int ext_ref)
4435 {
4436         struct btrfs_path path;
4437         struct btrfs_inode_ref *ref;
4438         struct btrfs_inode_extref *extref;
4439         struct extent_buffer *node;
4440         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4441         u32 total;
4442         u32 cur = 0;
4443         u32 len;
4444         u32 ref_namelen;
4445         u64 ref_index;
4446         u64 parent;
4447         u64 dir_id;
4448         int slot;
4449         int ret;
4450
4451         btrfs_init_path(&path);
4452         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4453         if (ret) {
4454                 ret = INODE_REF_MISSING;
4455                 goto extref;
4456         }
4457
4458         node = path.nodes[0];
4459         slot = path.slots[0];
4460
4461         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4462         total = btrfs_item_size_nr(node, slot);
4463
4464         /* Iterate all entry of INODE_REF */
4465         while (cur < total) {
4466                 ret = INODE_REF_MISSING;
4467
4468                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4469                 ref_index = btrfs_inode_ref_index(node, ref);
4470                 if (index != (u64)-1 && index != ref_index)
4471                         goto next_ref;
4472
4473                 if (ref_namelen <= BTRFS_NAME_LEN) {
4474                         len = ref_namelen;
4475                 } else {
4476                         len = BTRFS_NAME_LEN;
4477                         warning("root %llu INODE %s[%llu %llu] name too long",
4478                                 root->objectid,
4479                                 key->type == BTRFS_INODE_REF_KEY ?
4480                                         "REF" : "EXTREF",
4481                                 key->objectid, key->offset);
4482                 }
4483                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4484                                    len);
4485
4486                 if (len != namelen || strncmp(ref_namebuf, name, len))
4487                         goto next_ref;
4488
4489                 ret = 0;
4490                 goto out;
4491 next_ref:
4492                 len = sizeof(*ref) + ref_namelen;
4493                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4494                 cur += len;
4495         }
4496
4497 extref:
4498         /* Skip if not support EXTENDED_IREF feature */
4499         if (!ext_ref)
4500                 goto out;
4501
4502         btrfs_release_path(&path);
4503         btrfs_init_path(&path);
4504
4505         dir_id = key->offset;
4506         key->type = BTRFS_INODE_EXTREF_KEY;
4507         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4508
4509         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4510         if (ret) {
4511                 ret = INODE_REF_MISSING;
4512                 goto out;
4513         }
4514
4515         node = path.nodes[0];
4516         slot = path.slots[0];
4517
4518         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4519         cur = 0;
4520         total = btrfs_item_size_nr(node, slot);
4521
4522         /* Iterate all entry of INODE_EXTREF */
4523         while (cur < total) {
4524                 ret = INODE_REF_MISSING;
4525
4526                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4527                 ref_index = btrfs_inode_extref_index(node, extref);
4528                 parent = btrfs_inode_extref_parent(node, extref);
4529                 if (index != (u64)-1 && index != ref_index)
4530                         goto next_extref;
4531
4532                 if (parent != dir_id)
4533                         goto next_extref;
4534
4535                 if (ref_namelen <= BTRFS_NAME_LEN) {
4536                         len = ref_namelen;
4537                 } else {
4538                         len = BTRFS_NAME_LEN;
4539                         warning("root %llu INODE %s[%llu %llu] name too long",
4540                                 root->objectid,
4541                                 key->type == BTRFS_INODE_REF_KEY ?
4542                                         "REF" : "EXTREF",
4543                                 key->objectid, key->offset);
4544                 }
4545                 read_extent_buffer(node, ref_namebuf,
4546                                    (unsigned long)(extref + 1), len);
4547
4548                 if (len != namelen || strncmp(ref_namebuf, name, len))
4549                         goto next_extref;
4550
4551                 ret = 0;
4552                 goto out;
4553
4554 next_extref:
4555                 len = sizeof(*extref) + ref_namelen;
4556                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4557                 cur += len;
4558
4559         }
4560 out:
4561         btrfs_release_path(&path);
4562         return ret;
4563 }
4564
4565 /*
4566  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4567  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4568  *
4569  * @root:       the root of the fs/file tree
4570  * @key:        the key of the INODE_REF/INODE_EXTREF
4571  * @size:       the st_size of the INODE_ITEM
4572  * @ext_ref:    the EXTENDED_IREF feature
4573  *
4574  * Return 0 if no error occurred.
4575  */
4576 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4577                           struct extent_buffer *node, int slot, u64 *size,
4578                           unsigned int ext_ref)
4579 {
4580         struct btrfs_dir_item *di;
4581         struct btrfs_inode_item *ii;
4582         struct btrfs_path path;
4583         struct btrfs_key location;
4584         char namebuf[BTRFS_NAME_LEN] = {0};
4585         u32 total;
4586         u32 cur = 0;
4587         u32 len;
4588         u32 name_len;
4589         u32 data_len;
4590         u8 filetype;
4591         u32 mode;
4592         u64 index;
4593         int ret;
4594         int err = 0;
4595
4596         /*
4597          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4598          * ignore index check.
4599          */
4600         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4601
4602         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4603         total = btrfs_item_size_nr(node, slot);
4604
4605         while (cur < total) {
4606                 data_len = btrfs_dir_data_len(node, di);
4607                 if (data_len)
4608                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4609                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4610                               "DIR_ITEM" : "DIR_INDEX",
4611                               key->objectid, key->offset, data_len);
4612
4613                 name_len = btrfs_dir_name_len(node, di);
4614                 if (name_len <= BTRFS_NAME_LEN) {
4615                         len = name_len;
4616                 } else {
4617                         len = BTRFS_NAME_LEN;
4618                         warning("root %llu %s[%llu %llu] name too long",
4619                                 root->objectid,
4620                                 key->type == BTRFS_DIR_ITEM_KEY ?
4621                                 "DIR_ITEM" : "DIR_INDEX",
4622                                 key->objectid, key->offset);
4623                 }
4624                 (*size) += name_len;
4625
4626                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4627                 filetype = btrfs_dir_type(node, di);
4628
4629                 btrfs_init_path(&path);
4630                 btrfs_dir_item_key_to_cpu(node, di, &location);
4631
4632                 /* Ignore related ROOT_ITEM check */
4633                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4634                         goto next;
4635
4636                 /* Check relative INODE_ITEM(existence/filetype) */
4637                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4638                 if (ret) {
4639                         err |= INODE_ITEM_MISSING;
4640                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4641                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643                               key->offset, location.objectid, name_len,
4644                               namebuf, filetype);
4645                         goto next;
4646                 }
4647
4648                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4649                                     struct btrfs_inode_item);
4650                 mode = btrfs_inode_mode(path.nodes[0], ii);
4651
4652                 if (imode_to_type(mode) != filetype) {
4653                         err |= INODE_ITEM_MISMATCH;
4654                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4655                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4656                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4657                               key->offset, name_len, namebuf, filetype);
4658                 }
4659
4660                 /* Check relative INODE_REF/INODE_EXTREF */
4661                 location.type = BTRFS_INODE_REF_KEY;
4662                 location.offset = key->objectid;
4663                 ret = find_inode_ref(root, &location, namebuf, len,
4664                                        index, ext_ref);
4665                 err |= ret;
4666                 if (ret & INODE_REF_MISSING)
4667                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4668                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4669                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4670                               key->offset, name_len, namebuf, filetype);
4671
4672 next:
4673                 btrfs_release_path(&path);
4674                 len = sizeof(*di) + name_len + data_len;
4675                 di = (struct btrfs_dir_item *)((char *)di + len);
4676                 cur += len;
4677
4678                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4679                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4680                               root->objectid, key->objectid, key->offset);
4681                         break;
4682                 }
4683         }
4684
4685         return err;
4686 }
4687
4688 /*
4689  * Check file extent datasum/hole, update the size of the file extents,
4690  * check and update the last offset of the file extent.
4691  *
4692  * @root:       the root of fs/file tree.
4693  * @fkey:       the key of the file extent.
4694  * @nodatasum:  INODE_NODATASUM feature.
4695  * @size:       the sum of all EXTENT_DATA items size for this inode.
4696  * @end:        the offset of the last extent.
4697  *
4698  * Return 0 if no error occurred.
4699  */
4700 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4701                              struct extent_buffer *node, int slot,
4702                              unsigned int nodatasum, u64 *size, u64 *end)
4703 {
4704         struct btrfs_file_extent_item *fi;
4705         u64 disk_bytenr;
4706         u64 disk_num_bytes;
4707         u64 extent_num_bytes;
4708         u64 extent_offset;
4709         u64 csum_found;         /* In byte size, sectorsize aligned */
4710         u64 search_start;       /* Logical range start we search for csum */
4711         u64 search_len;         /* Logical range len we search for csum */
4712         unsigned int extent_type;
4713         unsigned int is_hole;
4714         int compressed = 0;
4715         int ret;
4716         int err = 0;
4717
4718         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4719
4720         /* Check inline extent */
4721         extent_type = btrfs_file_extent_type(node, fi);
4722         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4723                 struct btrfs_item *e = btrfs_item_nr(slot);
4724                 u32 item_inline_len;
4725
4726                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4727                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4728                 compressed = btrfs_file_extent_compression(node, fi);
4729                 if (extent_num_bytes == 0) {
4730                         error(
4731                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4732                                 root->objectid, fkey->objectid, fkey->offset);
4733                         err |= FILE_EXTENT_ERROR;
4734                 }
4735                 if (!compressed && extent_num_bytes != item_inline_len) {
4736                         error(
4737                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4738                                 root->objectid, fkey->objectid, fkey->offset,
4739                                 extent_num_bytes, item_inline_len);
4740                         err |= FILE_EXTENT_ERROR;
4741                 }
4742                 *size += extent_num_bytes;
4743                 return err;
4744         }
4745
4746         /* Check extent type */
4747         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4748                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4749                 err |= FILE_EXTENT_ERROR;
4750                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752                 return err;
4753         }
4754
4755         /* Check REG_EXTENT/PREALLOC_EXTENT */
4756         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4757         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4758         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4759         extent_offset = btrfs_file_extent_offset(node, fi);
4760         compressed = btrfs_file_extent_compression(node, fi);
4761         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4762
4763         /*
4764          * Check EXTENT_DATA csum
4765          *
4766          * For plain (uncompressed) extent, we should only check the range
4767          * we're referring to, as it's possible that part of prealloc extent
4768          * has been written, and has csum:
4769          *
4770          * |<--- Original large preallocated extent A ---->|
4771          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4772          *      No csum                         Has csum
4773          *
4774          * For compressed extent, we should check the whole range.
4775          */
4776         if (!compressed) {
4777                 search_start = disk_bytenr + extent_offset;
4778                 search_len = extent_num_bytes;
4779         } else {
4780                 search_start = disk_bytenr;
4781                 search_len = disk_num_bytes;
4782         }
4783         ret = count_csum_range(root, search_start, search_len, &csum_found);
4784         if (csum_found > 0 && nodatasum) {
4785                 err |= ODD_CSUM_ITEM;
4786                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4787                       root->objectid, fkey->objectid, fkey->offset);
4788         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4789                    !is_hole && (ret < 0 || csum_found < search_len)) {
4790                 err |= CSUM_ITEM_MISSING;
4791                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4792                       root->objectid, fkey->objectid, fkey->offset,
4793                       csum_found, search_len);
4794         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4795                 err |= ODD_CSUM_ITEM;
4796                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4797                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4798         }
4799
4800         /* Check EXTENT_DATA hole */
4801         if (no_holes && is_hole) {
4802                 err |= FILE_EXTENT_ERROR;
4803                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4804                       root->objectid, fkey->objectid, fkey->offset);
4805         } else if (!no_holes && *end != fkey->offset) {
4806                 err |= FILE_EXTENT_ERROR;
4807                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4808                       root->objectid, fkey->objectid, fkey->offset);
4809         }
4810
4811         *end += extent_num_bytes;
4812         if (!is_hole)
4813                 *size += extent_num_bytes;
4814
4815         return err;
4816 }
4817
4818 /*
4819  * Check INODE_ITEM and related ITEMs (the same inode number)
4820  * 1. check link count
4821  * 2. check inode ref/extref
4822  * 3. check dir item/index
4823  *
4824  * @ext_ref:    the EXTENDED_IREF feature
4825  *
4826  * Return 0 if no error occurred.
4827  * Return >0 for error or hit the traversal is done(by error bitmap)
4828  */
4829 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4830                             unsigned int ext_ref)
4831 {
4832         struct extent_buffer *node;
4833         struct btrfs_inode_item *ii;
4834         struct btrfs_key key;
4835         u64 inode_id;
4836         u32 mode;
4837         u64 nlink;
4838         u64 nbytes;
4839         u64 isize;
4840         u64 size = 0;
4841         u64 refs = 0;
4842         u64 extent_end = 0;
4843         u64 extent_size = 0;
4844         unsigned int dir;
4845         unsigned int nodatasum;
4846         int slot;
4847         int ret;
4848         int err = 0;
4849
4850         node = path->nodes[0];
4851         slot = path->slots[0];
4852
4853         btrfs_item_key_to_cpu(node, &key, slot);
4854         inode_id = key.objectid;
4855
4856         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4857                 ret = btrfs_next_item(root, path);
4858                 if (ret > 0)
4859                         err |= LAST_ITEM;
4860                 return err;
4861         }
4862
4863         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4864         isize = btrfs_inode_size(node, ii);
4865         nbytes = btrfs_inode_nbytes(node, ii);
4866         mode = btrfs_inode_mode(node, ii);
4867         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4868         nlink = btrfs_inode_nlink(node, ii);
4869         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4870
4871         while (1) {
4872                 ret = btrfs_next_item(root, path);
4873                 if (ret < 0) {
4874                         /* out will fill 'err' rusing current statistics */
4875                         goto out;
4876                 } else if (ret > 0) {
4877                         err |= LAST_ITEM;
4878                         goto out;
4879                 }
4880
4881                 node = path->nodes[0];
4882                 slot = path->slots[0];
4883                 btrfs_item_key_to_cpu(node, &key, slot);
4884                 if (key.objectid != inode_id)
4885                         goto out;
4886
4887                 switch (key.type) {
4888                 case BTRFS_INODE_REF_KEY:
4889                         ret = check_inode_ref(root, &key, node, slot, &refs,
4890                                               mode);
4891                         err |= ret;
4892                         break;
4893                 case BTRFS_INODE_EXTREF_KEY:
4894                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4895                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4896                                         root->objectid, key.objectid,
4897                                         key.offset);
4898                         ret = check_inode_extref(root, &key, node, slot, &refs,
4899                                                  mode);
4900                         err |= ret;
4901                         break;
4902                 case BTRFS_DIR_ITEM_KEY:
4903                 case BTRFS_DIR_INDEX_KEY:
4904                         if (!dir) {
4905                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4906                                         root->objectid, inode_id,
4907                                         imode_to_type(mode), key.objectid,
4908                                         key.offset);
4909                         }
4910                         ret = check_dir_item(root, &key, node, slot, &size,
4911                                              ext_ref);
4912                         err |= ret;
4913                         break;
4914                 case BTRFS_EXTENT_DATA_KEY:
4915                         if (dir) {
4916                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4917                                         root->objectid, inode_id, key.objectid,
4918                                         key.offset);
4919                         }
4920                         ret = check_file_extent(root, &key, node, slot,
4921                                                 nodatasum, &extent_size,
4922                                                 &extent_end);
4923                         err |= ret;
4924                         break;
4925                 case BTRFS_XATTR_ITEM_KEY:
4926                         break;
4927                 default:
4928                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4929                               key.objectid, key.type, key.offset);
4930                 }
4931         }
4932
4933 out:
4934         /* verify INODE_ITEM nlink/isize/nbytes */
4935         if (dir) {
4936                 if (nlink != 1) {
4937                         err |= LINK_COUNT_ERROR;
4938                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4939                               root->objectid, inode_id, nlink);
4940                 }
4941
4942                 /*
4943                  * Just a warning, as dir inode nbytes is just an
4944                  * instructive value.
4945                  */
4946                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4947                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4948                                 root->objectid, inode_id, root->nodesize);
4949                 }
4950
4951                 if (isize != size) {
4952                         err |= ISIZE_ERROR;
4953                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4954                               root->objectid, inode_id, isize, size);
4955                 }
4956         } else {
4957                 if (nlink != refs) {
4958                         err |= LINK_COUNT_ERROR;
4959                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4960                               root->objectid, inode_id, nlink, refs);
4961                 } else if (!nlink) {
4962                         err |= ORPHAN_ITEM;
4963                 }
4964
4965                 if (!nbytes && !no_holes && extent_end < isize) {
4966                         err |= NBYTES_ERROR;
4967                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4968                               root->objectid, inode_id, isize);
4969                 }
4970
4971                 if (nbytes != extent_size) {
4972                         err |= NBYTES_ERROR;
4973                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4974                               root->objectid, inode_id, nbytes, extent_size);
4975                 }
4976         }
4977
4978         return err;
4979 }
4980
4981 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4982 {
4983         struct btrfs_path path;
4984         struct btrfs_key key;
4985         int err = 0;
4986         int ret;
4987
4988         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4989         key.type = BTRFS_INODE_ITEM_KEY;
4990         key.offset = 0;
4991
4992         /* For root being dropped, we don't need to check first inode */
4993         if (btrfs_root_refs(&root->root_item) == 0 &&
4994             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4995             key.objectid)
4996                 return 0;
4997
4998         btrfs_init_path(&path);
4999
5000         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5001         if (ret < 0)
5002                 goto out;
5003         if (ret > 0) {
5004                 ret = 0;
5005                 err |= INODE_ITEM_MISSING;
5006                 error("first inode item of root %llu is missing",
5007                       root->objectid);
5008         }
5009
5010         err |= check_inode_item(root, &path, ext_ref);
5011         err &= ~LAST_ITEM;
5012         if (err && !ret)
5013                 ret = -EIO;
5014 out:
5015         btrfs_release_path(&path);
5016         return ret;
5017 }
5018
5019 /*
5020  * Iterate all item on the tree and call check_inode_item() to check.
5021  *
5022  * @root:       the root of the tree to be checked.
5023  * @ext_ref:    the EXTENDED_IREF feature
5024  *
5025  * Return 0 if no error found.
5026  * Return <0 for error.
5027  */
5028 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5029 {
5030         struct btrfs_path path;
5031         struct node_refs nrefs;
5032         struct btrfs_root_item *root_item = &root->root_item;
5033         int ret;
5034         int level;
5035         int err = 0;
5036
5037         /*
5038          * We need to manually check the first inode item(256)
5039          * As the following traversal function will only start from
5040          * the first inode item in the leaf, if inode item(256) is missing
5041          * we will just skip it forever.
5042          */
5043         ret = check_fs_first_inode(root, ext_ref);
5044         if (ret < 0)
5045                 return ret;
5046
5047         memset(&nrefs, 0, sizeof(nrefs));
5048         level = btrfs_header_level(root->node);
5049         btrfs_init_path(&path);
5050
5051         if (btrfs_root_refs(root_item) > 0 ||
5052             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5053                 path.nodes[level] = root->node;
5054                 path.slots[level] = 0;
5055                 extent_buffer_get(root->node);
5056         } else {
5057                 struct btrfs_key key;
5058
5059                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5060                 level = root_item->drop_level;
5061                 path.lowest_level = level;
5062                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5063                 if (ret < 0)
5064                         goto out;
5065                 ret = 0;
5066         }
5067
5068         while (1) {
5069                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5070                 err |= !!ret;
5071
5072                 /* if ret is negative, walk shall stop */
5073                 if (ret < 0) {
5074                         ret = err;
5075                         break;
5076                 }
5077
5078                 ret = walk_up_tree_v2(root, &path, &level);
5079                 if (ret != 0) {
5080                         /* Normal exit, reset ret to err */
5081                         ret = err;
5082                         break;
5083                 }
5084         }
5085
5086 out:
5087         btrfs_release_path(&path);
5088         return ret;
5089 }
5090
5091 /*
5092  * Find the relative ref for root_ref and root_backref.
5093  *
5094  * @root:       the root of the root tree.
5095  * @ref_key:    the key of the root ref.
5096  *
5097  * Return 0 if no error occurred.
5098  */
5099 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5100                           struct extent_buffer *node, int slot)
5101 {
5102         struct btrfs_path path;
5103         struct btrfs_key key;
5104         struct btrfs_root_ref *ref;
5105         struct btrfs_root_ref *backref;
5106         char ref_name[BTRFS_NAME_LEN] = {0};
5107         char backref_name[BTRFS_NAME_LEN] = {0};
5108         u64 ref_dirid;
5109         u64 ref_seq;
5110         u32 ref_namelen;
5111         u64 backref_dirid;
5112         u64 backref_seq;
5113         u32 backref_namelen;
5114         u32 len;
5115         int ret;
5116         int err = 0;
5117
5118         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5119         ref_dirid = btrfs_root_ref_dirid(node, ref);
5120         ref_seq = btrfs_root_ref_sequence(node, ref);
5121         ref_namelen = btrfs_root_ref_name_len(node, ref);
5122
5123         if (ref_namelen <= BTRFS_NAME_LEN) {
5124                 len = ref_namelen;
5125         } else {
5126                 len = BTRFS_NAME_LEN;
5127                 warning("%s[%llu %llu] ref_name too long",
5128                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5129                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5130                         ref_key->offset);
5131         }
5132         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5133
5134         /* Find relative root_ref */
5135         key.objectid = ref_key->offset;
5136         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5137         key.offset = ref_key->objectid;
5138
5139         btrfs_init_path(&path);
5140         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5141         if (ret) {
5142                 err |= ROOT_REF_MISSING;
5143                 error("%s[%llu %llu] couldn't find relative ref",
5144                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5145                       "ROOT_REF" : "ROOT_BACKREF",
5146                       ref_key->objectid, ref_key->offset);
5147                 goto out;
5148         }
5149
5150         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5151                                  struct btrfs_root_ref);
5152         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5153         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5154         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5155
5156         if (backref_namelen <= BTRFS_NAME_LEN) {
5157                 len = backref_namelen;
5158         } else {
5159                 len = BTRFS_NAME_LEN;
5160                 warning("%s[%llu %llu] ref_name too long",
5161                         key.type == BTRFS_ROOT_REF_KEY ?
5162                         "ROOT_REF" : "ROOT_BACKREF",
5163                         key.objectid, key.offset);
5164         }
5165         read_extent_buffer(path.nodes[0], backref_name,
5166                            (unsigned long)(backref + 1), len);
5167
5168         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5169             ref_namelen != backref_namelen ||
5170             strncmp(ref_name, backref_name, len)) {
5171                 err |= ROOT_REF_MISMATCH;
5172                 error("%s[%llu %llu] mismatch relative ref",
5173                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5174                       "ROOT_REF" : "ROOT_BACKREF",
5175                       ref_key->objectid, ref_key->offset);
5176         }
5177 out:
5178         btrfs_release_path(&path);
5179         return err;
5180 }
5181
5182 /*
5183  * Check all fs/file tree in low_memory mode.
5184  *
5185  * 1. for fs tree root item, call check_fs_root_v2()
5186  * 2. for fs tree root ref/backref, call check_root_ref()
5187  *
5188  * Return 0 if no error occurred.
5189  */
5190 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5191 {
5192         struct btrfs_root *tree_root = fs_info->tree_root;
5193         struct btrfs_root *cur_root = NULL;
5194         struct btrfs_path path;
5195         struct btrfs_key key;
5196         struct extent_buffer *node;
5197         unsigned int ext_ref;
5198         int slot;
5199         int ret;
5200         int err = 0;
5201
5202         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5203
5204         btrfs_init_path(&path);
5205         key.objectid = BTRFS_FS_TREE_OBJECTID;
5206         key.offset = 0;
5207         key.type = BTRFS_ROOT_ITEM_KEY;
5208
5209         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5210         if (ret < 0) {
5211                 err = ret;
5212                 goto out;
5213         } else if (ret > 0) {
5214                 err = -ENOENT;
5215                 goto out;
5216         }
5217
5218         while (1) {
5219                 node = path.nodes[0];
5220                 slot = path.slots[0];
5221                 btrfs_item_key_to_cpu(node, &key, slot);
5222                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5223                         goto out;
5224                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5225                     fs_root_objectid(key.objectid)) {
5226                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5227                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5228                                                                        &key);
5229                         } else {
5230                                 key.offset = (u64)-1;
5231                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5232                         }
5233
5234                         if (IS_ERR(cur_root)) {
5235                                 error("Fail to read fs/subvol tree: %lld",
5236                                       key.objectid);
5237                                 err = -EIO;
5238                                 goto next;
5239                         }
5240
5241                         ret = check_fs_root_v2(cur_root, ext_ref);
5242                         err |= ret;
5243
5244                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5245                                 btrfs_free_fs_root(cur_root);
5246                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5247                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5248                         ret = check_root_ref(tree_root, &key, node, slot);
5249                         err |= ret;
5250                 }
5251 next:
5252                 ret = btrfs_next_item(tree_root, &path);
5253                 if (ret > 0)
5254                         goto out;
5255                 if (ret < 0) {
5256                         err = ret;
5257                         goto out;
5258                 }
5259         }
5260
5261 out:
5262         btrfs_release_path(&path);
5263         return err;
5264 }
5265
5266 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5267 {
5268         struct list_head *cur = rec->backrefs.next;
5269         struct extent_backref *back;
5270         struct tree_backref *tback;
5271         struct data_backref *dback;
5272         u64 found = 0;
5273         int err = 0;
5274
5275         while(cur != &rec->backrefs) {
5276                 back = to_extent_backref(cur);
5277                 cur = cur->next;
5278                 if (!back->found_extent_tree) {
5279                         err = 1;
5280                         if (!print_errs)
5281                                 goto out;
5282                         if (back->is_data) {
5283                                 dback = to_data_backref(back);
5284                                 fprintf(stderr, "Backref %llu %s %llu"
5285                                         " owner %llu offset %llu num_refs %lu"
5286                                         " not found in extent tree\n",
5287                                         (unsigned long long)rec->start,
5288                                         back->full_backref ?
5289                                         "parent" : "root",
5290                                         back->full_backref ?
5291                                         (unsigned long long)dback->parent:
5292                                         (unsigned long long)dback->root,
5293                                         (unsigned long long)dback->owner,
5294                                         (unsigned long long)dback->offset,
5295                                         (unsigned long)dback->num_refs);
5296                         } else {
5297                                 tback = to_tree_backref(back);
5298                                 fprintf(stderr, "Backref %llu parent %llu"
5299                                         " root %llu not found in extent tree\n",
5300                                         (unsigned long long)rec->start,
5301                                         (unsigned long long)tback->parent,
5302                                         (unsigned long long)tback->root);
5303                         }
5304                 }
5305                 if (!back->is_data && !back->found_ref) {
5306                         err = 1;
5307                         if (!print_errs)
5308                                 goto out;
5309                         tback = to_tree_backref(back);
5310                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5311                                 (unsigned long long)rec->start,
5312                                 back->full_backref ? "parent" : "root",
5313                                 back->full_backref ?
5314                                 (unsigned long long)tback->parent :
5315                                 (unsigned long long)tback->root, back);
5316                 }
5317                 if (back->is_data) {
5318                         dback = to_data_backref(back);
5319                         if (dback->found_ref != dback->num_refs) {
5320                                 err = 1;
5321                                 if (!print_errs)
5322                                         goto out;
5323                                 fprintf(stderr, "Incorrect local backref count"
5324                                         " on %llu %s %llu owner %llu"
5325                                         " offset %llu found %u wanted %u back %p\n",
5326                                         (unsigned long long)rec->start,
5327                                         back->full_backref ?
5328                                         "parent" : "root",
5329                                         back->full_backref ?
5330                                         (unsigned long long)dback->parent:
5331                                         (unsigned long long)dback->root,
5332                                         (unsigned long long)dback->owner,
5333                                         (unsigned long long)dback->offset,
5334                                         dback->found_ref, dback->num_refs, back);
5335                         }
5336                         if (dback->disk_bytenr != rec->start) {
5337                                 err = 1;
5338                                 if (!print_errs)
5339                                         goto out;
5340                                 fprintf(stderr, "Backref disk bytenr does not"
5341                                         " match extent record, bytenr=%llu, "
5342                                         "ref bytenr=%llu\n",
5343                                         (unsigned long long)rec->start,
5344                                         (unsigned long long)dback->disk_bytenr);
5345                         }
5346
5347                         if (dback->bytes != rec->nr) {
5348                                 err = 1;
5349                                 if (!print_errs)
5350                                         goto out;
5351                                 fprintf(stderr, "Backref bytes do not match "
5352                                         "extent backref, bytenr=%llu, ref "
5353                                         "bytes=%llu, backref bytes=%llu\n",
5354                                         (unsigned long long)rec->start,
5355                                         (unsigned long long)rec->nr,
5356                                         (unsigned long long)dback->bytes);
5357                         }
5358                 }
5359                 if (!back->is_data) {
5360                         found += 1;
5361                 } else {
5362                         dback = to_data_backref(back);
5363                         found += dback->found_ref;
5364                 }
5365         }
5366         if (found != rec->refs) {
5367                 err = 1;
5368                 if (!print_errs)
5369                         goto out;
5370                 fprintf(stderr, "Incorrect global backref count "
5371                         "on %llu found %llu wanted %llu\n",
5372                         (unsigned long long)rec->start,
5373                         (unsigned long long)found,
5374                         (unsigned long long)rec->refs);
5375         }
5376 out:
5377         return err;
5378 }
5379
5380 static int free_all_extent_backrefs(struct extent_record *rec)
5381 {
5382         struct extent_backref *back;
5383         struct list_head *cur;
5384         while (!list_empty(&rec->backrefs)) {
5385                 cur = rec->backrefs.next;
5386                 back = to_extent_backref(cur);
5387                 list_del(cur);
5388                 free(back);
5389         }
5390         return 0;
5391 }
5392
5393 static void free_extent_record_cache(struct cache_tree *extent_cache)
5394 {
5395         struct cache_extent *cache;
5396         struct extent_record *rec;
5397
5398         while (1) {
5399                 cache = first_cache_extent(extent_cache);
5400                 if (!cache)
5401                         break;
5402                 rec = container_of(cache, struct extent_record, cache);
5403                 remove_cache_extent(extent_cache, cache);
5404                 free_all_extent_backrefs(rec);
5405                 free(rec);
5406         }
5407 }
5408
5409 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5410                                  struct extent_record *rec)
5411 {
5412         if (rec->content_checked && rec->owner_ref_checked &&
5413             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5414             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5415             !rec->bad_full_backref && !rec->crossing_stripes &&
5416             !rec->wrong_chunk_type) {
5417                 remove_cache_extent(extent_cache, &rec->cache);
5418                 free_all_extent_backrefs(rec);
5419                 list_del_init(&rec->list);
5420                 free(rec);
5421         }
5422         return 0;
5423 }
5424
5425 static int check_owner_ref(struct btrfs_root *root,
5426                             struct extent_record *rec,
5427                             struct extent_buffer *buf)
5428 {
5429         struct extent_backref *node;
5430         struct tree_backref *back;
5431         struct btrfs_root *ref_root;
5432         struct btrfs_key key;
5433         struct btrfs_path path;
5434         struct extent_buffer *parent;
5435         int level;
5436         int found = 0;
5437         int ret;
5438
5439         list_for_each_entry(node, &rec->backrefs, list) {
5440                 if (node->is_data)
5441                         continue;
5442                 if (!node->found_ref)
5443                         continue;
5444                 if (node->full_backref)
5445                         continue;
5446                 back = to_tree_backref(node);
5447                 if (btrfs_header_owner(buf) == back->root)
5448                         return 0;
5449         }
5450         BUG_ON(rec->is_root);
5451
5452         /* try to find the block by search corresponding fs tree */
5453         key.objectid = btrfs_header_owner(buf);
5454         key.type = BTRFS_ROOT_ITEM_KEY;
5455         key.offset = (u64)-1;
5456
5457         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5458         if (IS_ERR(ref_root))
5459                 return 1;
5460
5461         level = btrfs_header_level(buf);
5462         if (level == 0)
5463                 btrfs_item_key_to_cpu(buf, &key, 0);
5464         else
5465                 btrfs_node_key_to_cpu(buf, &key, 0);
5466
5467         btrfs_init_path(&path);
5468         path.lowest_level = level + 1;
5469         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5470         if (ret < 0)
5471                 return 0;
5472
5473         parent = path.nodes[level + 1];
5474         if (parent && buf->start == btrfs_node_blockptr(parent,
5475                                                         path.slots[level + 1]))
5476                 found = 1;
5477
5478         btrfs_release_path(&path);
5479         return found ? 0 : 1;
5480 }
5481
5482 static int is_extent_tree_record(struct extent_record *rec)
5483 {
5484         struct list_head *cur = rec->backrefs.next;
5485         struct extent_backref *node;
5486         struct tree_backref *back;
5487         int is_extent = 0;
5488
5489         while(cur != &rec->backrefs) {
5490                 node = to_extent_backref(cur);
5491                 cur = cur->next;
5492                 if (node->is_data)
5493                         return 0;
5494                 back = to_tree_backref(node);
5495                 if (node->full_backref)
5496                         return 0;
5497                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5498                         is_extent = 1;
5499         }
5500         return is_extent;
5501 }
5502
5503
5504 static int record_bad_block_io(struct btrfs_fs_info *info,
5505                                struct cache_tree *extent_cache,
5506                                u64 start, u64 len)
5507 {
5508         struct extent_record *rec;
5509         struct cache_extent *cache;
5510         struct btrfs_key key;
5511
5512         cache = lookup_cache_extent(extent_cache, start, len);
5513         if (!cache)
5514                 return 0;
5515
5516         rec = container_of(cache, struct extent_record, cache);
5517         if (!is_extent_tree_record(rec))
5518                 return 0;
5519
5520         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5521         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5522 }
5523
5524 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5525                        struct extent_buffer *buf, int slot)
5526 {
5527         if (btrfs_header_level(buf)) {
5528                 struct btrfs_key_ptr ptr1, ptr2;
5529
5530                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5531                                    sizeof(struct btrfs_key_ptr));
5532                 read_extent_buffer(buf, &ptr2,
5533                                    btrfs_node_key_ptr_offset(slot + 1),
5534                                    sizeof(struct btrfs_key_ptr));
5535                 write_extent_buffer(buf, &ptr1,
5536                                     btrfs_node_key_ptr_offset(slot + 1),
5537                                     sizeof(struct btrfs_key_ptr));
5538                 write_extent_buffer(buf, &ptr2,
5539                                     btrfs_node_key_ptr_offset(slot),
5540                                     sizeof(struct btrfs_key_ptr));
5541                 if (slot == 0) {
5542                         struct btrfs_disk_key key;
5543                         btrfs_node_key(buf, &key, 0);
5544                         btrfs_fixup_low_keys(root, path, &key,
5545                                              btrfs_header_level(buf) + 1);
5546                 }
5547         } else {
5548                 struct btrfs_item *item1, *item2;
5549                 struct btrfs_key k1, k2;
5550                 char *item1_data, *item2_data;
5551                 u32 item1_offset, item2_offset, item1_size, item2_size;
5552
5553                 item1 = btrfs_item_nr(slot);
5554                 item2 = btrfs_item_nr(slot + 1);
5555                 btrfs_item_key_to_cpu(buf, &k1, slot);
5556                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5557                 item1_offset = btrfs_item_offset(buf, item1);
5558                 item2_offset = btrfs_item_offset(buf, item2);
5559                 item1_size = btrfs_item_size(buf, item1);
5560                 item2_size = btrfs_item_size(buf, item2);
5561
5562                 item1_data = malloc(item1_size);
5563                 if (!item1_data)
5564                         return -ENOMEM;
5565                 item2_data = malloc(item2_size);
5566                 if (!item2_data) {
5567                         free(item1_data);
5568                         return -ENOMEM;
5569                 }
5570
5571                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5572                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5573
5574                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5575                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5576                 free(item1_data);
5577                 free(item2_data);
5578
5579                 btrfs_set_item_offset(buf, item1, item2_offset);
5580                 btrfs_set_item_offset(buf, item2, item1_offset);
5581                 btrfs_set_item_size(buf, item1, item2_size);
5582                 btrfs_set_item_size(buf, item2, item1_size);
5583
5584                 path->slots[0] = slot;
5585                 btrfs_set_item_key_unsafe(root, path, &k2);
5586                 path->slots[0] = slot + 1;
5587                 btrfs_set_item_key_unsafe(root, path, &k1);
5588         }
5589         return 0;
5590 }
5591
5592 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5593 {
5594         struct extent_buffer *buf;
5595         struct btrfs_key k1, k2;
5596         int i;
5597         int level = path->lowest_level;
5598         int ret = -EIO;
5599
5600         buf = path->nodes[level];
5601         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5602                 if (level) {
5603                         btrfs_node_key_to_cpu(buf, &k1, i);
5604                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5605                 } else {
5606                         btrfs_item_key_to_cpu(buf, &k1, i);
5607                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5608                 }
5609                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5610                         continue;
5611                 ret = swap_values(root, path, buf, i);
5612                 if (ret)
5613                         break;
5614                 btrfs_mark_buffer_dirty(buf);
5615                 i = 0;
5616         }
5617         return ret;
5618 }
5619
5620 static int delete_bogus_item(struct btrfs_root *root,
5621                              struct btrfs_path *path,
5622                              struct extent_buffer *buf, int slot)
5623 {
5624         struct btrfs_key key;
5625         int nritems = btrfs_header_nritems(buf);
5626
5627         btrfs_item_key_to_cpu(buf, &key, slot);
5628
5629         /* These are all the keys we can deal with missing. */
5630         if (key.type != BTRFS_DIR_INDEX_KEY &&
5631             key.type != BTRFS_EXTENT_ITEM_KEY &&
5632             key.type != BTRFS_METADATA_ITEM_KEY &&
5633             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5634             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5635                 return -1;
5636
5637         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5638                (unsigned long long)key.objectid, key.type,
5639                (unsigned long long)key.offset, slot, buf->start);
5640         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5641                               btrfs_item_nr_offset(slot + 1),
5642                               sizeof(struct btrfs_item) *
5643                               (nritems - slot - 1));
5644         btrfs_set_header_nritems(buf, nritems - 1);
5645         if (slot == 0) {
5646                 struct btrfs_disk_key disk_key;
5647
5648                 btrfs_item_key(buf, &disk_key, 0);
5649                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5650         }
5651         btrfs_mark_buffer_dirty(buf);
5652         return 0;
5653 }
5654
5655 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5656 {
5657         struct extent_buffer *buf;
5658         int i;
5659         int ret = 0;
5660
5661         /* We should only get this for leaves */
5662         BUG_ON(path->lowest_level);
5663         buf = path->nodes[0];
5664 again:
5665         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5666                 unsigned int shift = 0, offset;
5667
5668                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5669                     BTRFS_LEAF_DATA_SIZE(root)) {
5670                         if (btrfs_item_end_nr(buf, i) >
5671                             BTRFS_LEAF_DATA_SIZE(root)) {
5672                                 ret = delete_bogus_item(root, path, buf, i);
5673                                 if (!ret)
5674                                         goto again;
5675                                 fprintf(stderr, "item is off the end of the "
5676                                         "leaf, can't fix\n");
5677                                 ret = -EIO;
5678                                 break;
5679                         }
5680                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5681                                 btrfs_item_end_nr(buf, i);
5682                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5683                            btrfs_item_offset_nr(buf, i - 1)) {
5684                         if (btrfs_item_end_nr(buf, i) >
5685                             btrfs_item_offset_nr(buf, i - 1)) {
5686                                 ret = delete_bogus_item(root, path, buf, i);
5687                                 if (!ret)
5688                                         goto again;
5689                                 fprintf(stderr, "items overlap, can't fix\n");
5690                                 ret = -EIO;
5691                                 break;
5692                         }
5693                         shift = btrfs_item_offset_nr(buf, i - 1) -
5694                                 btrfs_item_end_nr(buf, i);
5695                 }
5696                 if (!shift)
5697                         continue;
5698
5699                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5700                        i, shift, (unsigned long long)buf->start);
5701                 offset = btrfs_item_offset_nr(buf, i);
5702                 memmove_extent_buffer(buf,
5703                                       btrfs_leaf_data(buf) + offset + shift,
5704                                       btrfs_leaf_data(buf) + offset,
5705                                       btrfs_item_size_nr(buf, i));
5706                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5707                                       offset + shift);
5708                 btrfs_mark_buffer_dirty(buf);
5709         }
5710
5711         /*
5712          * We may have moved things, in which case we want to exit so we don't
5713          * write those changes out.  Once we have proper abort functionality in
5714          * progs this can be changed to something nicer.
5715          */
5716         BUG_ON(ret);
5717         return ret;
5718 }
5719
5720 /*
5721  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5722  * then just return -EIO.
5723  */
5724 static int try_to_fix_bad_block(struct btrfs_root *root,
5725                                 struct extent_buffer *buf,
5726                                 enum btrfs_tree_block_status status)
5727 {
5728         struct btrfs_trans_handle *trans;
5729         struct ulist *roots;
5730         struct ulist_node *node;
5731         struct btrfs_root *search_root;
5732         struct btrfs_path path;
5733         struct ulist_iterator iter;
5734         struct btrfs_key root_key, key;
5735         int ret;
5736
5737         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5738             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5739                 return -EIO;
5740
5741         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5742         if (ret)
5743                 return -EIO;
5744
5745         btrfs_init_path(&path);
5746         ULIST_ITER_INIT(&iter);
5747         while ((node = ulist_next(roots, &iter))) {
5748                 root_key.objectid = node->val;
5749                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5750                 root_key.offset = (u64)-1;
5751
5752                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5753                 if (IS_ERR(root)) {
5754                         ret = -EIO;
5755                         break;
5756                 }
5757
5758
5759                 trans = btrfs_start_transaction(search_root, 0);
5760                 if (IS_ERR(trans)) {
5761                         ret = PTR_ERR(trans);
5762                         break;
5763                 }
5764
5765                 path.lowest_level = btrfs_header_level(buf);
5766                 path.skip_check_block = 1;
5767                 if (path.lowest_level)
5768                         btrfs_node_key_to_cpu(buf, &key, 0);
5769                 else
5770                         btrfs_item_key_to_cpu(buf, &key, 0);
5771                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5772                 if (ret) {
5773                         ret = -EIO;
5774                         btrfs_commit_transaction(trans, search_root);
5775                         break;
5776                 }
5777                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5778                         ret = fix_key_order(search_root, &path);
5779                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780                         ret = fix_item_offset(search_root, &path);
5781                 if (ret) {
5782                         btrfs_commit_transaction(trans, search_root);
5783                         break;
5784                 }
5785                 btrfs_release_path(&path);
5786                 btrfs_commit_transaction(trans, search_root);
5787         }
5788         ulist_free(roots);
5789         btrfs_release_path(&path);
5790         return ret;
5791 }
5792
5793 static int check_block(struct btrfs_root *root,
5794                        struct cache_tree *extent_cache,
5795                        struct extent_buffer *buf, u64 flags)
5796 {
5797         struct extent_record *rec;
5798         struct cache_extent *cache;
5799         struct btrfs_key key;
5800         enum btrfs_tree_block_status status;
5801         int ret = 0;
5802         int level;
5803
5804         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5805         if (!cache)
5806                 return 1;
5807         rec = container_of(cache, struct extent_record, cache);
5808         rec->generation = btrfs_header_generation(buf);
5809
5810         level = btrfs_header_level(buf);
5811         if (btrfs_header_nritems(buf) > 0) {
5812
5813                 if (level == 0)
5814                         btrfs_item_key_to_cpu(buf, &key, 0);
5815                 else
5816                         btrfs_node_key_to_cpu(buf, &key, 0);
5817
5818                 rec->info_objectid = key.objectid;
5819         }
5820         rec->info_level = level;
5821
5822         if (btrfs_is_leaf(buf))
5823                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5824         else
5825                 status = btrfs_check_node(root, &rec->parent_key, buf);
5826
5827         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5828                 if (repair)
5829                         status = try_to_fix_bad_block(root, buf, status);
5830                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5831                         ret = -EIO;
5832                         fprintf(stderr, "bad block %llu\n",
5833                                 (unsigned long long)buf->start);
5834                 } else {
5835                         /*
5836                          * Signal to callers we need to start the scan over
5837                          * again since we'll have cowed blocks.
5838                          */
5839                         ret = -EAGAIN;
5840                 }
5841         } else {
5842                 rec->content_checked = 1;
5843                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5844                         rec->owner_ref_checked = 1;
5845                 else {
5846                         ret = check_owner_ref(root, rec, buf);
5847                         if (!ret)
5848                                 rec->owner_ref_checked = 1;
5849                 }
5850         }
5851         if (!ret)
5852                 maybe_free_extent_rec(extent_cache, rec);
5853         return ret;
5854 }
5855
5856 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5857                                                 u64 parent, u64 root)
5858 {
5859         struct list_head *cur = rec->backrefs.next;
5860         struct extent_backref *node;
5861         struct tree_backref *back;
5862
5863         while(cur != &rec->backrefs) {
5864                 node = to_extent_backref(cur);
5865                 cur = cur->next;
5866                 if (node->is_data)
5867                         continue;
5868                 back = to_tree_backref(node);
5869                 if (parent > 0) {
5870                         if (!node->full_backref)
5871                                 continue;
5872                         if (parent == back->parent)
5873                                 return back;
5874                 } else {
5875                         if (node->full_backref)
5876                                 continue;
5877                         if (back->root == root)
5878                                 return back;
5879                 }
5880         }
5881         return NULL;
5882 }
5883
5884 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5885                                                 u64 parent, u64 root)
5886 {
5887         struct tree_backref *ref = malloc(sizeof(*ref));
5888
5889         if (!ref)
5890                 return NULL;
5891         memset(&ref->node, 0, sizeof(ref->node));
5892         if (parent > 0) {
5893                 ref->parent = parent;
5894                 ref->node.full_backref = 1;
5895         } else {
5896                 ref->root = root;
5897                 ref->node.full_backref = 0;
5898         }
5899         list_add_tail(&ref->node.list, &rec->backrefs);
5900
5901         return ref;
5902 }
5903
5904 static struct data_backref *find_data_backref(struct extent_record *rec,
5905                                                 u64 parent, u64 root,
5906                                                 u64 owner, u64 offset,
5907                                                 int found_ref,
5908                                                 u64 disk_bytenr, u64 bytes)
5909 {
5910         struct list_head *cur = rec->backrefs.next;
5911         struct extent_backref *node;
5912         struct data_backref *back;
5913
5914         while(cur != &rec->backrefs) {
5915                 node = to_extent_backref(cur);
5916                 cur = cur->next;
5917                 if (!node->is_data)
5918                         continue;
5919                 back = to_data_backref(node);
5920                 if (parent > 0) {
5921                         if (!node->full_backref)
5922                                 continue;
5923                         if (parent == back->parent)
5924                                 return back;
5925                 } else {
5926                         if (node->full_backref)
5927                                 continue;
5928                         if (back->root == root && back->owner == owner &&
5929                             back->offset == offset) {
5930                                 if (found_ref && node->found_ref &&
5931                                     (back->bytes != bytes ||
5932                                     back->disk_bytenr != disk_bytenr))
5933                                         continue;
5934                                 return back;
5935                         }
5936                 }
5937         }
5938         return NULL;
5939 }
5940
5941 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5942                                                 u64 parent, u64 root,
5943                                                 u64 owner, u64 offset,
5944                                                 u64 max_size)
5945 {
5946         struct data_backref *ref = malloc(sizeof(*ref));
5947
5948         if (!ref)
5949                 return NULL;
5950         memset(&ref->node, 0, sizeof(ref->node));
5951         ref->node.is_data = 1;
5952
5953         if (parent > 0) {
5954                 ref->parent = parent;
5955                 ref->owner = 0;
5956                 ref->offset = 0;
5957                 ref->node.full_backref = 1;
5958         } else {
5959                 ref->root = root;
5960                 ref->owner = owner;
5961                 ref->offset = offset;
5962                 ref->node.full_backref = 0;
5963         }
5964         ref->bytes = max_size;
5965         ref->found_ref = 0;
5966         ref->num_refs = 0;
5967         list_add_tail(&ref->node.list, &rec->backrefs);
5968         if (max_size > rec->max_size)
5969                 rec->max_size = max_size;
5970         return ref;
5971 }
5972
5973 /* Check if the type of extent matches with its chunk */
5974 static void check_extent_type(struct extent_record *rec)
5975 {
5976         struct btrfs_block_group_cache *bg_cache;
5977
5978         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5979         if (!bg_cache)
5980                 return;
5981
5982         /* data extent, check chunk directly*/
5983         if (!rec->metadata) {
5984                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5985                         rec->wrong_chunk_type = 1;
5986                 return;
5987         }
5988
5989         /* metadata extent, check the obvious case first */
5990         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5991                                  BTRFS_BLOCK_GROUP_METADATA))) {
5992                 rec->wrong_chunk_type = 1;
5993                 return;
5994         }
5995
5996         /*
5997          * Check SYSTEM extent, as it's also marked as metadata, we can only
5998          * make sure it's a SYSTEM extent by its backref
5999          */
6000         if (!list_empty(&rec->backrefs)) {
6001                 struct extent_backref *node;
6002                 struct tree_backref *tback;
6003                 u64 bg_type;
6004
6005                 node = to_extent_backref(rec->backrefs.next);
6006                 if (node->is_data) {
6007                         /* tree block shouldn't have data backref */
6008                         rec->wrong_chunk_type = 1;
6009                         return;
6010                 }
6011                 tback = container_of(node, struct tree_backref, node);
6012
6013                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6014                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6015                 else
6016                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6017                 if (!(bg_cache->flags & bg_type))
6018                         rec->wrong_chunk_type = 1;
6019         }
6020 }
6021
6022 /*
6023  * Allocate a new extent record, fill default values from @tmpl and insert int
6024  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6025  * the cache, otherwise it fails.
6026  */
6027 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6028                 struct extent_record *tmpl)
6029 {
6030         struct extent_record *rec;
6031         int ret = 0;
6032
6033         rec = malloc(sizeof(*rec));
6034         if (!rec)
6035                 return -ENOMEM;
6036         rec->start = tmpl->start;
6037         rec->max_size = tmpl->max_size;
6038         rec->nr = max(tmpl->nr, tmpl->max_size);
6039         rec->found_rec = tmpl->found_rec;
6040         rec->content_checked = tmpl->content_checked;
6041         rec->owner_ref_checked = tmpl->owner_ref_checked;
6042         rec->num_duplicates = 0;
6043         rec->metadata = tmpl->metadata;
6044         rec->flag_block_full_backref = FLAG_UNSET;
6045         rec->bad_full_backref = 0;
6046         rec->crossing_stripes = 0;
6047         rec->wrong_chunk_type = 0;
6048         rec->is_root = tmpl->is_root;
6049         rec->refs = tmpl->refs;
6050         rec->extent_item_refs = tmpl->extent_item_refs;
6051         rec->parent_generation = tmpl->parent_generation;
6052         INIT_LIST_HEAD(&rec->backrefs);
6053         INIT_LIST_HEAD(&rec->dups);
6054         INIT_LIST_HEAD(&rec->list);
6055         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6056         rec->cache.start = tmpl->start;
6057         rec->cache.size = tmpl->nr;
6058         ret = insert_cache_extent(extent_cache, &rec->cache);
6059         if (ret) {
6060                 free(rec);
6061                 return ret;
6062         }
6063         bytes_used += rec->nr;
6064
6065         if (tmpl->metadata)
6066                 rec->crossing_stripes = check_crossing_stripes(global_info,
6067                                 rec->start, global_info->tree_root->nodesize);
6068         check_extent_type(rec);
6069         return ret;
6070 }
6071
6072 /*
6073  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6074  * some are hints:
6075  * - refs              - if found, increase refs
6076  * - is_root           - if found, set
6077  * - content_checked   - if found, set
6078  * - owner_ref_checked - if found, set
6079  *
6080  * If not found, create a new one, initialize and insert.
6081  */
6082 static int add_extent_rec(struct cache_tree *extent_cache,
6083                 struct extent_record *tmpl)
6084 {
6085         struct extent_record *rec;
6086         struct cache_extent *cache;
6087         int ret = 0;
6088         int dup = 0;
6089
6090         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6091         if (cache) {
6092                 rec = container_of(cache, struct extent_record, cache);
6093                 if (tmpl->refs)
6094                         rec->refs++;
6095                 if (rec->nr == 1)
6096                         rec->nr = max(tmpl->nr, tmpl->max_size);
6097
6098                 /*
6099                  * We need to make sure to reset nr to whatever the extent
6100                  * record says was the real size, this way we can compare it to
6101                  * the backrefs.
6102                  */
6103                 if (tmpl->found_rec) {
6104                         if (tmpl->start != rec->start || rec->found_rec) {
6105                                 struct extent_record *tmp;
6106
6107                                 dup = 1;
6108                                 if (list_empty(&rec->list))
6109                                         list_add_tail(&rec->list,
6110                                                       &duplicate_extents);
6111
6112                                 /*
6113                                  * We have to do this song and dance in case we
6114                                  * find an extent record that falls inside of
6115                                  * our current extent record but does not have
6116                                  * the same objectid.
6117                                  */
6118                                 tmp = malloc(sizeof(*tmp));
6119                                 if (!tmp)
6120                                         return -ENOMEM;
6121                                 tmp->start = tmpl->start;
6122                                 tmp->max_size = tmpl->max_size;
6123                                 tmp->nr = tmpl->nr;
6124                                 tmp->found_rec = 1;
6125                                 tmp->metadata = tmpl->metadata;
6126                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6127                                 INIT_LIST_HEAD(&tmp->list);
6128                                 list_add_tail(&tmp->list, &rec->dups);
6129                                 rec->num_duplicates++;
6130                         } else {
6131                                 rec->nr = tmpl->nr;
6132                                 rec->found_rec = 1;
6133                         }
6134                 }
6135
6136                 if (tmpl->extent_item_refs && !dup) {
6137                         if (rec->extent_item_refs) {
6138                                 fprintf(stderr, "block %llu rec "
6139                                         "extent_item_refs %llu, passed %llu\n",
6140                                         (unsigned long long)tmpl->start,
6141                                         (unsigned long long)
6142                                                         rec->extent_item_refs,
6143                                         (unsigned long long)tmpl->extent_item_refs);
6144                         }
6145                         rec->extent_item_refs = tmpl->extent_item_refs;
6146                 }
6147                 if (tmpl->is_root)
6148                         rec->is_root = 1;
6149                 if (tmpl->content_checked)
6150                         rec->content_checked = 1;
6151                 if (tmpl->owner_ref_checked)
6152                         rec->owner_ref_checked = 1;
6153                 memcpy(&rec->parent_key, &tmpl->parent_key,
6154                                 sizeof(tmpl->parent_key));
6155                 if (tmpl->parent_generation)
6156                         rec->parent_generation = tmpl->parent_generation;
6157                 if (rec->max_size < tmpl->max_size)
6158                         rec->max_size = tmpl->max_size;
6159
6160                 /*
6161                  * A metadata extent can't cross stripe_len boundary, otherwise
6162                  * kernel scrub won't be able to handle it.
6163                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6164                  * it.
6165                  */
6166                 if (tmpl->metadata)
6167                         rec->crossing_stripes = check_crossing_stripes(
6168                                         global_info, rec->start,
6169                                         global_info->tree_root->nodesize);
6170                 check_extent_type(rec);
6171                 maybe_free_extent_rec(extent_cache, rec);
6172                 return ret;
6173         }
6174
6175         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6176
6177         return ret;
6178 }
6179
6180 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6181                             u64 parent, u64 root, int found_ref)
6182 {
6183         struct extent_record *rec;
6184         struct tree_backref *back;
6185         struct cache_extent *cache;
6186         int ret;
6187
6188         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6189         if (!cache) {
6190                 struct extent_record tmpl;
6191
6192                 memset(&tmpl, 0, sizeof(tmpl));
6193                 tmpl.start = bytenr;
6194                 tmpl.nr = 1;
6195                 tmpl.metadata = 1;
6196
6197                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6198                 if (ret)
6199                         return ret;
6200
6201                 /* really a bug in cache_extent implement now */
6202                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6203                 if (!cache)
6204                         return -ENOENT;
6205         }
6206
6207         rec = container_of(cache, struct extent_record, cache);
6208         if (rec->start != bytenr) {
6209                 /*
6210                  * Several cause, from unaligned bytenr to over lapping extents
6211                  */
6212                 return -EEXIST;
6213         }
6214
6215         back = find_tree_backref(rec, parent, root);
6216         if (!back) {
6217                 back = alloc_tree_backref(rec, parent, root);
6218                 if (!back)
6219                         return -ENOMEM;
6220         }
6221
6222         if (found_ref) {
6223                 if (back->node.found_ref) {
6224                         fprintf(stderr, "Extent back ref already exists "
6225                                 "for %llu parent %llu root %llu \n",
6226                                 (unsigned long long)bytenr,
6227                                 (unsigned long long)parent,
6228                                 (unsigned long long)root);
6229                 }
6230                 back->node.found_ref = 1;
6231         } else {
6232                 if (back->node.found_extent_tree) {
6233                         fprintf(stderr, "Extent back ref already exists "
6234                                 "for %llu parent %llu root %llu \n",
6235                                 (unsigned long long)bytenr,
6236                                 (unsigned long long)parent,
6237                                 (unsigned long long)root);
6238                 }
6239                 back->node.found_extent_tree = 1;
6240         }
6241         check_extent_type(rec);
6242         maybe_free_extent_rec(extent_cache, rec);
6243         return 0;
6244 }
6245
6246 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6247                             u64 parent, u64 root, u64 owner, u64 offset,
6248                             u32 num_refs, int found_ref, u64 max_size)
6249 {
6250         struct extent_record *rec;
6251         struct data_backref *back;
6252         struct cache_extent *cache;
6253         int ret;
6254
6255         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6256         if (!cache) {
6257                 struct extent_record tmpl;
6258
6259                 memset(&tmpl, 0, sizeof(tmpl));
6260                 tmpl.start = bytenr;
6261                 tmpl.nr = 1;
6262                 tmpl.max_size = max_size;
6263
6264                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6265                 if (ret)
6266                         return ret;
6267
6268                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6269                 if (!cache)
6270                         abort();
6271         }
6272
6273         rec = container_of(cache, struct extent_record, cache);
6274         if (rec->max_size < max_size)
6275                 rec->max_size = max_size;
6276
6277         /*
6278          * If found_ref is set then max_size is the real size and must match the
6279          * existing refs.  So if we have already found a ref then we need to
6280          * make sure that this ref matches the existing one, otherwise we need
6281          * to add a new backref so we can notice that the backrefs don't match
6282          * and we need to figure out who is telling the truth.  This is to
6283          * account for that awful fsync bug I introduced where we'd end up with
6284          * a btrfs_file_extent_item that would have its length include multiple
6285          * prealloc extents or point inside of a prealloc extent.
6286          */
6287         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6288                                  bytenr, max_size);
6289         if (!back) {
6290                 back = alloc_data_backref(rec, parent, root, owner, offset,
6291                                           max_size);
6292                 BUG_ON(!back);
6293         }
6294
6295         if (found_ref) {
6296                 BUG_ON(num_refs != 1);
6297                 if (back->node.found_ref)
6298                         BUG_ON(back->bytes != max_size);
6299                 back->node.found_ref = 1;
6300                 back->found_ref += 1;
6301                 back->bytes = max_size;
6302                 back->disk_bytenr = bytenr;
6303                 rec->refs += 1;
6304                 rec->content_checked = 1;
6305                 rec->owner_ref_checked = 1;
6306         } else {
6307                 if (back->node.found_extent_tree) {
6308                         fprintf(stderr, "Extent back ref already exists "
6309                                 "for %llu parent %llu root %llu "
6310                                 "owner %llu offset %llu num_refs %lu\n",
6311                                 (unsigned long long)bytenr,
6312                                 (unsigned long long)parent,
6313                                 (unsigned long long)root,
6314                                 (unsigned long long)owner,
6315                                 (unsigned long long)offset,
6316                                 (unsigned long)num_refs);
6317                 }
6318                 back->num_refs = num_refs;
6319                 back->node.found_extent_tree = 1;
6320         }
6321         maybe_free_extent_rec(extent_cache, rec);
6322         return 0;
6323 }
6324
6325 static int add_pending(struct cache_tree *pending,
6326                        struct cache_tree *seen, u64 bytenr, u32 size)
6327 {
6328         int ret;
6329         ret = add_cache_extent(seen, bytenr, size);
6330         if (ret)
6331                 return ret;
6332         add_cache_extent(pending, bytenr, size);
6333         return 0;
6334 }
6335
6336 static int pick_next_pending(struct cache_tree *pending,
6337                         struct cache_tree *reada,
6338                         struct cache_tree *nodes,
6339                         u64 last, struct block_info *bits, int bits_nr,
6340                         int *reada_bits)
6341 {
6342         unsigned long node_start = last;
6343         struct cache_extent *cache;
6344         int ret;
6345
6346         cache = search_cache_extent(reada, 0);
6347         if (cache) {
6348                 bits[0].start = cache->start;
6349                 bits[0].size = cache->size;
6350                 *reada_bits = 1;
6351                 return 1;
6352         }
6353         *reada_bits = 0;
6354         if (node_start > 32768)
6355                 node_start -= 32768;
6356
6357         cache = search_cache_extent(nodes, node_start);
6358         if (!cache)
6359                 cache = search_cache_extent(nodes, 0);
6360
6361         if (!cache) {
6362                  cache = search_cache_extent(pending, 0);
6363                  if (!cache)
6364                          return 0;
6365                  ret = 0;
6366                  do {
6367                          bits[ret].start = cache->start;
6368                          bits[ret].size = cache->size;
6369                          cache = next_cache_extent(cache);
6370                          ret++;
6371                  } while (cache && ret < bits_nr);
6372                  return ret;
6373         }
6374
6375         ret = 0;
6376         do {
6377                 bits[ret].start = cache->start;
6378                 bits[ret].size = cache->size;
6379                 cache = next_cache_extent(cache);
6380                 ret++;
6381         } while (cache && ret < bits_nr);
6382
6383         if (bits_nr - ret > 8) {
6384                 u64 lookup = bits[0].start + bits[0].size;
6385                 struct cache_extent *next;
6386                 next = search_cache_extent(pending, lookup);
6387                 while(next) {
6388                         if (next->start - lookup > 32768)
6389                                 break;
6390                         bits[ret].start = next->start;
6391                         bits[ret].size = next->size;
6392                         lookup = next->start + next->size;
6393                         ret++;
6394                         if (ret == bits_nr)
6395                                 break;
6396                         next = next_cache_extent(next);
6397                         if (!next)
6398                                 break;
6399                 }
6400         }
6401         return ret;
6402 }
6403
6404 static void free_chunk_record(struct cache_extent *cache)
6405 {
6406         struct chunk_record *rec;
6407
6408         rec = container_of(cache, struct chunk_record, cache);
6409         list_del_init(&rec->list);
6410         list_del_init(&rec->dextents);
6411         free(rec);
6412 }
6413
6414 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6415 {
6416         cache_tree_free_extents(chunk_cache, free_chunk_record);
6417 }
6418
6419 static void free_device_record(struct rb_node *node)
6420 {
6421         struct device_record *rec;
6422
6423         rec = container_of(node, struct device_record, node);
6424         free(rec);
6425 }
6426
6427 FREE_RB_BASED_TREE(device_cache, free_device_record);
6428
6429 int insert_block_group_record(struct block_group_tree *tree,
6430                               struct block_group_record *bg_rec)
6431 {
6432         int ret;
6433
6434         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6435         if (ret)
6436                 return ret;
6437
6438         list_add_tail(&bg_rec->list, &tree->block_groups);
6439         return 0;
6440 }
6441
6442 static void free_block_group_record(struct cache_extent *cache)
6443 {
6444         struct block_group_record *rec;
6445
6446         rec = container_of(cache, struct block_group_record, cache);
6447         list_del_init(&rec->list);
6448         free(rec);
6449 }
6450
6451 void free_block_group_tree(struct block_group_tree *tree)
6452 {
6453         cache_tree_free_extents(&tree->tree, free_block_group_record);
6454 }
6455
6456 int insert_device_extent_record(struct device_extent_tree *tree,
6457                                 struct device_extent_record *de_rec)
6458 {
6459         int ret;
6460
6461         /*
6462          * Device extent is a bit different from the other extents, because
6463          * the extents which belong to the different devices may have the
6464          * same start and size, so we need use the special extent cache
6465          * search/insert functions.
6466          */
6467         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6468         if (ret)
6469                 return ret;
6470
6471         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6472         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6473         return 0;
6474 }
6475
6476 static void free_device_extent_record(struct cache_extent *cache)
6477 {
6478         struct device_extent_record *rec;
6479
6480         rec = container_of(cache, struct device_extent_record, cache);
6481         if (!list_empty(&rec->chunk_list))
6482                 list_del_init(&rec->chunk_list);
6483         if (!list_empty(&rec->device_list))
6484                 list_del_init(&rec->device_list);
6485         free(rec);
6486 }
6487
6488 void free_device_extent_tree(struct device_extent_tree *tree)
6489 {
6490         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6491 }
6492
6493 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6494 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6495                                  struct extent_buffer *leaf, int slot)
6496 {
6497         struct btrfs_extent_ref_v0 *ref0;
6498         struct btrfs_key key;
6499         int ret;
6500
6501         btrfs_item_key_to_cpu(leaf, &key, slot);
6502         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6503         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6504                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6505                                 0, 0);
6506         } else {
6507                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6508                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6509         }
6510         return ret;
6511 }
6512 #endif
6513
6514 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6515                                             struct btrfs_key *key,
6516                                             int slot)
6517 {
6518         struct btrfs_chunk *ptr;
6519         struct chunk_record *rec;
6520         int num_stripes, i;
6521
6522         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6523         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6524
6525         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6526         if (!rec) {
6527                 fprintf(stderr, "memory allocation failed\n");
6528                 exit(-1);
6529         }
6530
6531         INIT_LIST_HEAD(&rec->list);
6532         INIT_LIST_HEAD(&rec->dextents);
6533         rec->bg_rec = NULL;
6534
6535         rec->cache.start = key->offset;
6536         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6537
6538         rec->generation = btrfs_header_generation(leaf);
6539
6540         rec->objectid = key->objectid;
6541         rec->type = key->type;
6542         rec->offset = key->offset;
6543
6544         rec->length = rec->cache.size;
6545         rec->owner = btrfs_chunk_owner(leaf, ptr);
6546         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6547         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6548         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6549         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6550         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6551         rec->num_stripes = num_stripes;
6552         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6553
6554         for (i = 0; i < rec->num_stripes; ++i) {
6555                 rec->stripes[i].devid =
6556                         btrfs_stripe_devid_nr(leaf, ptr, i);
6557                 rec->stripes[i].offset =
6558                         btrfs_stripe_offset_nr(leaf, ptr, i);
6559                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6560                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6561                                 BTRFS_UUID_SIZE);
6562         }
6563
6564         return rec;
6565 }
6566
6567 static int process_chunk_item(struct cache_tree *chunk_cache,
6568                               struct btrfs_key *key, struct extent_buffer *eb,
6569                               int slot)
6570 {
6571         struct chunk_record *rec;
6572         struct btrfs_chunk *chunk;
6573         int ret = 0;
6574
6575         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6576         /*
6577          * Do extra check for this chunk item,
6578          *
6579          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6580          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6581          * and owner<->key_type check.
6582          */
6583         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6584                                       key->offset);
6585         if (ret < 0) {
6586                 error("chunk(%llu, %llu) is not valid, ignore it",
6587                       key->offset, btrfs_chunk_length(eb, chunk));
6588                 return 0;
6589         }
6590         rec = btrfs_new_chunk_record(eb, key, slot);
6591         ret = insert_cache_extent(chunk_cache, &rec->cache);
6592         if (ret) {
6593                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6594                         rec->offset, rec->length);
6595                 free(rec);
6596         }
6597
6598         return ret;
6599 }
6600
6601 static int process_device_item(struct rb_root *dev_cache,
6602                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6603 {
6604         struct btrfs_dev_item *ptr;
6605         struct device_record *rec;
6606         int ret = 0;
6607
6608         ptr = btrfs_item_ptr(eb,
6609                 slot, struct btrfs_dev_item);
6610
6611         rec = malloc(sizeof(*rec));
6612         if (!rec) {
6613                 fprintf(stderr, "memory allocation failed\n");
6614                 return -ENOMEM;
6615         }
6616
6617         rec->devid = key->offset;
6618         rec->generation = btrfs_header_generation(eb);
6619
6620         rec->objectid = key->objectid;
6621         rec->type = key->type;
6622         rec->offset = key->offset;
6623
6624         rec->devid = btrfs_device_id(eb, ptr);
6625         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6626         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6627
6628         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6629         if (ret) {
6630                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6631                 free(rec);
6632         }
6633
6634         return ret;
6635 }
6636
6637 struct block_group_record *
6638 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6639                              int slot)
6640 {
6641         struct btrfs_block_group_item *ptr;
6642         struct block_group_record *rec;
6643
6644         rec = calloc(1, sizeof(*rec));
6645         if (!rec) {
6646                 fprintf(stderr, "memory allocation failed\n");
6647                 exit(-1);
6648         }
6649
6650         rec->cache.start = key->objectid;
6651         rec->cache.size = key->offset;
6652
6653         rec->generation = btrfs_header_generation(leaf);
6654
6655         rec->objectid = key->objectid;
6656         rec->type = key->type;
6657         rec->offset = key->offset;
6658
6659         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6660         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6661
6662         INIT_LIST_HEAD(&rec->list);
6663
6664         return rec;
6665 }
6666
6667 static int process_block_group_item(struct block_group_tree *block_group_cache,
6668                                     struct btrfs_key *key,
6669                                     struct extent_buffer *eb, int slot)
6670 {
6671         struct block_group_record *rec;
6672         int ret = 0;
6673
6674         rec = btrfs_new_block_group_record(eb, key, slot);
6675         ret = insert_block_group_record(block_group_cache, rec);
6676         if (ret) {
6677                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6678                         rec->objectid, rec->offset);
6679                 free(rec);
6680         }
6681
6682         return ret;
6683 }
6684
6685 struct device_extent_record *
6686 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6687                                struct btrfs_key *key, int slot)
6688 {
6689         struct device_extent_record *rec;
6690         struct btrfs_dev_extent *ptr;
6691
6692         rec = calloc(1, sizeof(*rec));
6693         if (!rec) {
6694                 fprintf(stderr, "memory allocation failed\n");
6695                 exit(-1);
6696         }
6697
6698         rec->cache.objectid = key->objectid;
6699         rec->cache.start = key->offset;
6700
6701         rec->generation = btrfs_header_generation(leaf);
6702
6703         rec->objectid = key->objectid;
6704         rec->type = key->type;
6705         rec->offset = key->offset;
6706
6707         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6708         rec->chunk_objecteid =
6709                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6710         rec->chunk_offset =
6711                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6712         rec->length = btrfs_dev_extent_length(leaf, ptr);
6713         rec->cache.size = rec->length;
6714
6715         INIT_LIST_HEAD(&rec->chunk_list);
6716         INIT_LIST_HEAD(&rec->device_list);
6717
6718         return rec;
6719 }
6720
6721 static int
6722 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6723                            struct btrfs_key *key, struct extent_buffer *eb,
6724                            int slot)
6725 {
6726         struct device_extent_record *rec;
6727         int ret;
6728
6729         rec = btrfs_new_device_extent_record(eb, key, slot);
6730         ret = insert_device_extent_record(dev_extent_cache, rec);
6731         if (ret) {
6732                 fprintf(stderr,
6733                         "Device extent[%llu, %llu, %llu] existed.\n",
6734                         rec->objectid, rec->offset, rec->length);
6735                 free(rec);
6736         }
6737
6738         return ret;
6739 }
6740
6741 static int process_extent_item(struct btrfs_root *root,
6742                                struct cache_tree *extent_cache,
6743                                struct extent_buffer *eb, int slot)
6744 {
6745         struct btrfs_extent_item *ei;
6746         struct btrfs_extent_inline_ref *iref;
6747         struct btrfs_extent_data_ref *dref;
6748         struct btrfs_shared_data_ref *sref;
6749         struct btrfs_key key;
6750         struct extent_record tmpl;
6751         unsigned long end;
6752         unsigned long ptr;
6753         int ret;
6754         int type;
6755         u32 item_size = btrfs_item_size_nr(eb, slot);
6756         u64 refs = 0;
6757         u64 offset;
6758         u64 num_bytes;
6759         int metadata = 0;
6760
6761         btrfs_item_key_to_cpu(eb, &key, slot);
6762
6763         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6764                 metadata = 1;
6765                 num_bytes = root->nodesize;
6766         } else {
6767                 num_bytes = key.offset;
6768         }
6769
6770         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6771                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6772                       key.objectid, root->sectorsize);
6773                 return -EIO;
6774         }
6775         if (item_size < sizeof(*ei)) {
6776 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6777                 struct btrfs_extent_item_v0 *ei0;
6778                 BUG_ON(item_size != sizeof(*ei0));
6779                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6780                 refs = btrfs_extent_refs_v0(eb, ei0);
6781 #else
6782                 BUG();
6783 #endif
6784                 memset(&tmpl, 0, sizeof(tmpl));
6785                 tmpl.start = key.objectid;
6786                 tmpl.nr = num_bytes;
6787                 tmpl.extent_item_refs = refs;
6788                 tmpl.metadata = metadata;
6789                 tmpl.found_rec = 1;
6790                 tmpl.max_size = num_bytes;
6791
6792                 return add_extent_rec(extent_cache, &tmpl);
6793         }
6794
6795         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6796         refs = btrfs_extent_refs(eb, ei);
6797         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6798                 metadata = 1;
6799         else
6800                 metadata = 0;
6801         if (metadata && num_bytes != root->nodesize) {
6802                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6803                       num_bytes, root->nodesize);
6804                 return -EIO;
6805         }
6806         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6807                 error("ignore invalid data extent, length %llu is not aligned to %u",
6808                       num_bytes, root->sectorsize);
6809                 return -EIO;
6810         }
6811
6812         memset(&tmpl, 0, sizeof(tmpl));
6813         tmpl.start = key.objectid;
6814         tmpl.nr = num_bytes;
6815         tmpl.extent_item_refs = refs;
6816         tmpl.metadata = metadata;
6817         tmpl.found_rec = 1;
6818         tmpl.max_size = num_bytes;
6819         add_extent_rec(extent_cache, &tmpl);
6820
6821         ptr = (unsigned long)(ei + 1);
6822         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6823             key.type == BTRFS_EXTENT_ITEM_KEY)
6824                 ptr += sizeof(struct btrfs_tree_block_info);
6825
6826         end = (unsigned long)ei + item_size;
6827         while (ptr < end) {
6828                 iref = (struct btrfs_extent_inline_ref *)ptr;
6829                 type = btrfs_extent_inline_ref_type(eb, iref);
6830                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6831                 switch (type) {
6832                 case BTRFS_TREE_BLOCK_REF_KEY:
6833                         ret = add_tree_backref(extent_cache, key.objectid,
6834                                         0, offset, 0);
6835                         if (ret < 0)
6836                                 error("add_tree_backref failed: %s",
6837                                       strerror(-ret));
6838                         break;
6839                 case BTRFS_SHARED_BLOCK_REF_KEY:
6840                         ret = add_tree_backref(extent_cache, key.objectid,
6841                                         offset, 0, 0);
6842                         if (ret < 0)
6843                                 error("add_tree_backref failed: %s",
6844                                       strerror(-ret));
6845                         break;
6846                 case BTRFS_EXTENT_DATA_REF_KEY:
6847                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6848                         add_data_backref(extent_cache, key.objectid, 0,
6849                                         btrfs_extent_data_ref_root(eb, dref),
6850                                         btrfs_extent_data_ref_objectid(eb,
6851                                                                        dref),
6852                                         btrfs_extent_data_ref_offset(eb, dref),
6853                                         btrfs_extent_data_ref_count(eb, dref),
6854                                         0, num_bytes);
6855                         break;
6856                 case BTRFS_SHARED_DATA_REF_KEY:
6857                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6858                         add_data_backref(extent_cache, key.objectid, offset,
6859                                         0, 0, 0,
6860                                         btrfs_shared_data_ref_count(eb, sref),
6861                                         0, num_bytes);
6862                         break;
6863                 default:
6864                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6865                                 key.objectid, key.type, num_bytes);
6866                         goto out;
6867                 }
6868                 ptr += btrfs_extent_inline_ref_size(type);
6869         }
6870         WARN_ON(ptr > end);
6871 out:
6872         return 0;
6873 }
6874
6875 static int check_cache_range(struct btrfs_root *root,
6876                              struct btrfs_block_group_cache *cache,
6877                              u64 offset, u64 bytes)
6878 {
6879         struct btrfs_free_space *entry;
6880         u64 *logical;
6881         u64 bytenr;
6882         int stripe_len;
6883         int i, nr, ret;
6884
6885         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6886                 bytenr = btrfs_sb_offset(i);
6887                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6888                                        cache->key.objectid, bytenr, 0,
6889                                        &logical, &nr, &stripe_len);
6890                 if (ret)
6891                         return ret;
6892
6893                 while (nr--) {
6894                         if (logical[nr] + stripe_len <= offset)
6895                                 continue;
6896                         if (offset + bytes <= logical[nr])
6897                                 continue;
6898                         if (logical[nr] == offset) {
6899                                 if (stripe_len >= bytes) {
6900                                         free(logical);
6901                                         return 0;
6902                                 }
6903                                 bytes -= stripe_len;
6904                                 offset += stripe_len;
6905                         } else if (logical[nr] < offset) {
6906                                 if (logical[nr] + stripe_len >=
6907                                     offset + bytes) {
6908                                         free(logical);
6909                                         return 0;
6910                                 }
6911                                 bytes = (offset + bytes) -
6912                                         (logical[nr] + stripe_len);
6913                                 offset = logical[nr] + stripe_len;
6914                         } else {
6915                                 /*
6916                                  * Could be tricky, the super may land in the
6917                                  * middle of the area we're checking.  First
6918                                  * check the easiest case, it's at the end.
6919                                  */
6920                                 if (logical[nr] + stripe_len >=
6921                                     bytes + offset) {
6922                                         bytes = logical[nr] - offset;
6923                                         continue;
6924                                 }
6925
6926                                 /* Check the left side */
6927                                 ret = check_cache_range(root, cache,
6928                                                         offset,
6929                                                         logical[nr] - offset);
6930                                 if (ret) {
6931                                         free(logical);
6932                                         return ret;
6933                                 }
6934
6935                                 /* Now we continue with the right side */
6936                                 bytes = (offset + bytes) -
6937                                         (logical[nr] + stripe_len);
6938                                 offset = logical[nr] + stripe_len;
6939                         }
6940                 }
6941
6942                 free(logical);
6943         }
6944
6945         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6946         if (!entry) {
6947                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6948                         offset, offset+bytes);
6949                 return -EINVAL;
6950         }
6951
6952         if (entry->offset != offset) {
6953                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6954                         entry->offset);
6955                 return -EINVAL;
6956         }
6957
6958         if (entry->bytes != bytes) {
6959                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6960                         bytes, entry->bytes, offset);
6961                 return -EINVAL;
6962         }
6963
6964         unlink_free_space(cache->free_space_ctl, entry);
6965         free(entry);
6966         return 0;
6967 }
6968
6969 static int verify_space_cache(struct btrfs_root *root,
6970                               struct btrfs_block_group_cache *cache)
6971 {
6972         struct btrfs_path path;
6973         struct extent_buffer *leaf;
6974         struct btrfs_key key;
6975         u64 last;
6976         int ret = 0;
6977
6978         root = root->fs_info->extent_root;
6979
6980         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6981
6982         btrfs_init_path(&path);
6983         key.objectid = last;
6984         key.offset = 0;
6985         key.type = BTRFS_EXTENT_ITEM_KEY;
6986         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6987         if (ret < 0)
6988                 goto out;
6989         ret = 0;
6990         while (1) {
6991                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6992                         ret = btrfs_next_leaf(root, &path);
6993                         if (ret < 0)
6994                                 goto out;
6995                         if (ret > 0) {
6996                                 ret = 0;
6997                                 break;
6998                         }
6999                 }
7000                 leaf = path.nodes[0];
7001                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7002                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7003                         break;
7004                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7005                     key.type != BTRFS_METADATA_ITEM_KEY) {
7006                         path.slots[0]++;
7007                         continue;
7008                 }
7009
7010                 if (last == key.objectid) {
7011                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7012                                 last = key.objectid + key.offset;
7013                         else
7014                                 last = key.objectid + root->nodesize;
7015                         path.slots[0]++;
7016                         continue;
7017                 }
7018
7019                 ret = check_cache_range(root, cache, last,
7020                                         key.objectid - last);
7021                 if (ret)
7022                         break;
7023                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7024                         last = key.objectid + key.offset;
7025                 else
7026                         last = key.objectid + root->nodesize;
7027                 path.slots[0]++;
7028         }
7029
7030         if (last < cache->key.objectid + cache->key.offset)
7031                 ret = check_cache_range(root, cache, last,
7032                                         cache->key.objectid +
7033                                         cache->key.offset - last);
7034
7035 out:
7036         btrfs_release_path(&path);
7037
7038         if (!ret &&
7039             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7040                 fprintf(stderr, "There are still entries left in the space "
7041                         "cache\n");
7042                 ret = -EINVAL;
7043         }
7044
7045         return ret;
7046 }
7047
7048 static int check_space_cache(struct btrfs_root *root)
7049 {
7050         struct btrfs_block_group_cache *cache;
7051         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7052         int ret;
7053         int error = 0;
7054
7055         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7056             btrfs_super_generation(root->fs_info->super_copy) !=
7057             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7058                 printf("cache and super generation don't match, space cache "
7059                        "will be invalidated\n");
7060                 return 0;
7061         }
7062
7063         if (ctx.progress_enabled) {
7064                 ctx.tp = TASK_FREE_SPACE;
7065                 task_start(ctx.info);
7066         }
7067
7068         while (1) {
7069                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7070                 if (!cache)
7071                         break;
7072
7073                 start = cache->key.objectid + cache->key.offset;
7074                 if (!cache->free_space_ctl) {
7075                         if (btrfs_init_free_space_ctl(cache,
7076                                                       root->sectorsize)) {
7077                                 ret = -ENOMEM;
7078                                 break;
7079                         }
7080                 } else {
7081                         btrfs_remove_free_space_cache(cache);
7082                 }
7083
7084                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7085                         ret = exclude_super_stripes(root, cache);
7086                         if (ret) {
7087                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7088                                         strerror(-ret));
7089                                 error++;
7090                                 continue;
7091                         }
7092                         ret = load_free_space_tree(root->fs_info, cache);
7093                         free_excluded_extents(root, cache);
7094                         if (ret < 0) {
7095                                 fprintf(stderr, "could not load free space tree: %s\n",
7096                                         strerror(-ret));
7097                                 error++;
7098                                 continue;
7099                         }
7100                         error += ret;
7101                 } else {
7102                         ret = load_free_space_cache(root->fs_info, cache);
7103                         if (!ret)
7104                                 continue;
7105                 }
7106
7107                 ret = verify_space_cache(root, cache);
7108                 if (ret) {
7109                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7110                                 cache->key.objectid);
7111                         error++;
7112                 }
7113         }
7114
7115         task_stop(ctx.info);
7116
7117         return error ? -EINVAL : 0;
7118 }
7119
7120 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7121                         u64 num_bytes, unsigned long leaf_offset,
7122                         struct extent_buffer *eb) {
7123
7124         u64 offset = 0;
7125         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7126         char *data;
7127         unsigned long csum_offset;
7128         u32 csum;
7129         u32 csum_expected;
7130         u64 read_len;
7131         u64 data_checked = 0;
7132         u64 tmp;
7133         int ret = 0;
7134         int mirror;
7135         int num_copies;
7136
7137         if (num_bytes % root->sectorsize)
7138                 return -EINVAL;
7139
7140         data = malloc(num_bytes);
7141         if (!data)
7142                 return -ENOMEM;
7143
7144         while (offset < num_bytes) {
7145                 mirror = 0;
7146 again:
7147                 read_len = num_bytes - offset;
7148                 /* read as much space once a time */
7149                 ret = read_extent_data(root, data + offset,
7150                                 bytenr + offset, &read_len, mirror);
7151                 if (ret)
7152                         goto out;
7153                 data_checked = 0;
7154                 /* verify every 4k data's checksum */
7155                 while (data_checked < read_len) {
7156                         csum = ~(u32)0;
7157                         tmp = offset + data_checked;
7158
7159                         csum = btrfs_csum_data((char *)data + tmp,
7160                                                csum, root->sectorsize);
7161                         btrfs_csum_final(csum, (u8 *)&csum);
7162
7163                         csum_offset = leaf_offset +
7164                                  tmp / root->sectorsize * csum_size;
7165                         read_extent_buffer(eb, (char *)&csum_expected,
7166                                            csum_offset, csum_size);
7167                         /* try another mirror */
7168                         if (csum != csum_expected) {
7169                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7170                                                 mirror, bytenr + tmp,
7171                                                 csum, csum_expected);
7172                                 num_copies = btrfs_num_copies(
7173                                                 &root->fs_info->mapping_tree,
7174                                                 bytenr, num_bytes);
7175                                 if (mirror < num_copies - 1) {
7176                                         mirror += 1;
7177                                         goto again;
7178                                 }
7179                         }
7180                         data_checked += root->sectorsize;
7181                 }
7182                 offset += read_len;
7183         }
7184 out:
7185         free(data);
7186         return ret;
7187 }
7188
7189 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7190                                u64 num_bytes)
7191 {
7192         struct btrfs_path path;
7193         struct extent_buffer *leaf;
7194         struct btrfs_key key;
7195         int ret;
7196
7197         btrfs_init_path(&path);
7198         key.objectid = bytenr;
7199         key.type = BTRFS_EXTENT_ITEM_KEY;
7200         key.offset = (u64)-1;
7201
7202 again:
7203         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7204                                 0, 0);
7205         if (ret < 0) {
7206                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7207                 btrfs_release_path(&path);
7208                 return ret;
7209         } else if (ret) {
7210                 if (path.slots[0] > 0) {
7211                         path.slots[0]--;
7212                 } else {
7213                         ret = btrfs_prev_leaf(root, &path);
7214                         if (ret < 0) {
7215                                 goto out;
7216                         } else if (ret > 0) {
7217                                 ret = 0;
7218                                 goto out;
7219                         }
7220                 }
7221         }
7222
7223         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7224
7225         /*
7226          * Block group items come before extent items if they have the same
7227          * bytenr, so walk back one more just in case.  Dear future traveller,
7228          * first congrats on mastering time travel.  Now if it's not too much
7229          * trouble could you go back to 2006 and tell Chris to make the
7230          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7231          * EXTENT_ITEM_KEY please?
7232          */
7233         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7234                 if (path.slots[0] > 0) {
7235                         path.slots[0]--;
7236                 } else {
7237                         ret = btrfs_prev_leaf(root, &path);
7238                         if (ret < 0) {
7239                                 goto out;
7240                         } else if (ret > 0) {
7241                                 ret = 0;
7242                                 goto out;
7243                         }
7244                 }
7245                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7246         }
7247
7248         while (num_bytes) {
7249                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7250                         ret = btrfs_next_leaf(root, &path);
7251                         if (ret < 0) {
7252                                 fprintf(stderr, "Error going to next leaf "
7253                                         "%d\n", ret);
7254                                 btrfs_release_path(&path);
7255                                 return ret;
7256                         } else if (ret) {
7257                                 break;
7258                         }
7259                 }
7260                 leaf = path.nodes[0];
7261                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7262                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7263                         path.slots[0]++;
7264                         continue;
7265                 }
7266                 if (key.objectid + key.offset < bytenr) {
7267                         path.slots[0]++;
7268                         continue;
7269                 }
7270                 if (key.objectid > bytenr + num_bytes)
7271                         break;
7272
7273                 if (key.objectid == bytenr) {
7274                         if (key.offset >= num_bytes) {
7275                                 num_bytes = 0;
7276                                 break;
7277                         }
7278                         num_bytes -= key.offset;
7279                         bytenr += key.offset;
7280                 } else if (key.objectid < bytenr) {
7281                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7282                                 num_bytes = 0;
7283                                 break;
7284                         }
7285                         num_bytes = (bytenr + num_bytes) -
7286                                 (key.objectid + key.offset);
7287                         bytenr = key.objectid + key.offset;
7288                 } else {
7289                         if (key.objectid + key.offset < bytenr + num_bytes) {
7290                                 u64 new_start = key.objectid + key.offset;
7291                                 u64 new_bytes = bytenr + num_bytes - new_start;
7292
7293                                 /*
7294                                  * Weird case, the extent is in the middle of
7295                                  * our range, we'll have to search one side
7296                                  * and then the other.  Not sure if this happens
7297                                  * in real life, but no harm in coding it up
7298                                  * anyway just in case.
7299                                  */
7300                                 btrfs_release_path(&path);
7301                                 ret = check_extent_exists(root, new_start,
7302                                                           new_bytes);
7303                                 if (ret) {
7304                                         fprintf(stderr, "Right section didn't "
7305                                                 "have a record\n");
7306                                         break;
7307                                 }
7308                                 num_bytes = key.objectid - bytenr;
7309                                 goto again;
7310                         }
7311                         num_bytes = key.objectid - bytenr;
7312                 }
7313                 path.slots[0]++;
7314         }
7315         ret = 0;
7316
7317 out:
7318         if (num_bytes && !ret) {
7319                 fprintf(stderr, "There are no extents for csum range "
7320                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7321                 ret = 1;
7322         }
7323
7324         btrfs_release_path(&path);
7325         return ret;
7326 }
7327
7328 static int check_csums(struct btrfs_root *root)
7329 {
7330         struct btrfs_path path;
7331         struct extent_buffer *leaf;
7332         struct btrfs_key key;
7333         u64 offset = 0, num_bytes = 0;
7334         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7335         int errors = 0;
7336         int ret;
7337         u64 data_len;
7338         unsigned long leaf_offset;
7339
7340         root = root->fs_info->csum_root;
7341         if (!extent_buffer_uptodate(root->node)) {
7342                 fprintf(stderr, "No valid csum tree found\n");
7343                 return -ENOENT;
7344         }
7345
7346         btrfs_init_path(&path);
7347         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7348         key.type = BTRFS_EXTENT_CSUM_KEY;
7349         key.offset = 0;
7350         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7351         if (ret < 0) {
7352                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7353                 btrfs_release_path(&path);
7354                 return ret;
7355         }
7356
7357         if (ret > 0 && path.slots[0])
7358                 path.slots[0]--;
7359         ret = 0;
7360
7361         while (1) {
7362                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7363                         ret = btrfs_next_leaf(root, &path);
7364                         if (ret < 0) {
7365                                 fprintf(stderr, "Error going to next leaf "
7366                                         "%d\n", ret);
7367                                 break;
7368                         }
7369                         if (ret)
7370                                 break;
7371                 }
7372                 leaf = path.nodes[0];
7373
7374                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7375                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7376                         path.slots[0]++;
7377                         continue;
7378                 }
7379
7380                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7381                               csum_size) * root->sectorsize;
7382                 if (!check_data_csum)
7383                         goto skip_csum_check;
7384                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7385                 ret = check_extent_csums(root, key.offset, data_len,
7386                                          leaf_offset, leaf);
7387                 if (ret)
7388                         break;
7389 skip_csum_check:
7390                 if (!num_bytes) {
7391                         offset = key.offset;
7392                 } else if (key.offset != offset + num_bytes) {
7393                         ret = check_extent_exists(root, offset, num_bytes);
7394                         if (ret) {
7395                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7396                                         "there is no extent record\n",
7397                                         offset, offset+num_bytes);
7398                                 errors++;
7399                         }
7400                         offset = key.offset;
7401                         num_bytes = 0;
7402                 }
7403                 num_bytes += data_len;
7404                 path.slots[0]++;
7405         }
7406
7407         btrfs_release_path(&path);
7408         return errors;
7409 }
7410
7411 static int is_dropped_key(struct btrfs_key *key,
7412                           struct btrfs_key *drop_key) {
7413         if (key->objectid < drop_key->objectid)
7414                 return 1;
7415         else if (key->objectid == drop_key->objectid) {
7416                 if (key->type < drop_key->type)
7417                         return 1;
7418                 else if (key->type == drop_key->type) {
7419                         if (key->offset < drop_key->offset)
7420                                 return 1;
7421                 }
7422         }
7423         return 0;
7424 }
7425
7426 /*
7427  * Here are the rules for FULL_BACKREF.
7428  *
7429  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7430  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7431  *      FULL_BACKREF set.
7432  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7433  *    if it happened after the relocation occurred since we'll have dropped the
7434  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7435  *    have no real way to know for sure.
7436  *
7437  * We process the blocks one root at a time, and we start from the lowest root
7438  * objectid and go to the highest.  So we can just lookup the owner backref for
7439  * the record and if we don't find it then we know it doesn't exist and we have
7440  * a FULL BACKREF.
7441  *
7442  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7443  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7444  * be set or not and then we can check later once we've gathered all the refs.
7445  */
7446 static int calc_extent_flag(struct cache_tree *extent_cache,
7447                            struct extent_buffer *buf,
7448                            struct root_item_record *ri,
7449                            u64 *flags)
7450 {
7451         struct extent_record *rec;
7452         struct cache_extent *cache;
7453         struct tree_backref *tback;
7454         u64 owner = 0;
7455
7456         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7457         /* we have added this extent before */
7458         if (!cache)
7459                 return -ENOENT;
7460
7461         rec = container_of(cache, struct extent_record, cache);
7462
7463         /*
7464          * Except file/reloc tree, we can not have
7465          * FULL BACKREF MODE
7466          */
7467         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7468                 goto normal;
7469         /*
7470          * root node
7471          */
7472         if (buf->start == ri->bytenr)
7473                 goto normal;
7474
7475         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7476                 goto full_backref;
7477
7478         owner = btrfs_header_owner(buf);
7479         if (owner == ri->objectid)
7480                 goto normal;
7481
7482         tback = find_tree_backref(rec, 0, owner);
7483         if (!tback)
7484                 goto full_backref;
7485 normal:
7486         *flags = 0;
7487         if (rec->flag_block_full_backref != FLAG_UNSET &&
7488             rec->flag_block_full_backref != 0)
7489                 rec->bad_full_backref = 1;
7490         return 0;
7491 full_backref:
7492         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7493         if (rec->flag_block_full_backref != FLAG_UNSET &&
7494             rec->flag_block_full_backref != 1)
7495                 rec->bad_full_backref = 1;
7496         return 0;
7497 }
7498
7499 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7500 {
7501         fprintf(stderr, "Invalid key type(");
7502         print_key_type(stderr, 0, key_type);
7503         fprintf(stderr, ") found in root(");
7504         print_objectid(stderr, rootid, 0);
7505         fprintf(stderr, ")\n");
7506 }
7507
7508 /*
7509  * Check if the key is valid with its extent buffer.
7510  *
7511  * This is a early check in case invalid key exists in a extent buffer
7512  * This is not comprehensive yet, but should prevent wrong key/item passed
7513  * further
7514  */
7515 static int check_type_with_root(u64 rootid, u8 key_type)
7516 {
7517         switch (key_type) {
7518         /* Only valid in chunk tree */
7519         case BTRFS_DEV_ITEM_KEY:
7520         case BTRFS_CHUNK_ITEM_KEY:
7521                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7522                         goto err;
7523                 break;
7524         /* valid in csum and log tree */
7525         case BTRFS_CSUM_TREE_OBJECTID:
7526                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7527                       is_fstree(rootid)))
7528                         goto err;
7529                 break;
7530         case BTRFS_EXTENT_ITEM_KEY:
7531         case BTRFS_METADATA_ITEM_KEY:
7532         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7533                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7534                         goto err;
7535                 break;
7536         case BTRFS_ROOT_ITEM_KEY:
7537                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7538                         goto err;
7539                 break;
7540         case BTRFS_DEV_EXTENT_KEY:
7541                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7542                         goto err;
7543                 break;
7544         }
7545         return 0;
7546 err:
7547         report_mismatch_key_root(key_type, rootid);
7548         return -EINVAL;
7549 }
7550
7551 static int run_next_block(struct btrfs_root *root,
7552                           struct block_info *bits,
7553                           int bits_nr,
7554                           u64 *last,
7555                           struct cache_tree *pending,
7556                           struct cache_tree *seen,
7557                           struct cache_tree *reada,
7558                           struct cache_tree *nodes,
7559                           struct cache_tree *extent_cache,
7560                           struct cache_tree *chunk_cache,
7561                           struct rb_root *dev_cache,
7562                           struct block_group_tree *block_group_cache,
7563                           struct device_extent_tree *dev_extent_cache,
7564                           struct root_item_record *ri)
7565 {
7566         struct extent_buffer *buf;
7567         struct extent_record *rec = NULL;
7568         u64 bytenr;
7569         u32 size;
7570         u64 parent;
7571         u64 owner;
7572         u64 flags;
7573         u64 ptr;
7574         u64 gen = 0;
7575         int ret = 0;
7576         int i;
7577         int nritems;
7578         struct btrfs_key key;
7579         struct cache_extent *cache;
7580         int reada_bits;
7581
7582         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7583                                     bits_nr, &reada_bits);
7584         if (nritems == 0)
7585                 return 1;
7586
7587         if (!reada_bits) {
7588                 for(i = 0; i < nritems; i++) {
7589                         ret = add_cache_extent(reada, bits[i].start,
7590                                                bits[i].size);
7591                         if (ret == -EEXIST)
7592                                 continue;
7593
7594                         /* fixme, get the parent transid */
7595                         readahead_tree_block(root, bits[i].start,
7596                                              bits[i].size, 0);
7597                 }
7598         }
7599         *last = bits[0].start;
7600         bytenr = bits[0].start;
7601         size = bits[0].size;
7602
7603         cache = lookup_cache_extent(pending, bytenr, size);
7604         if (cache) {
7605                 remove_cache_extent(pending, cache);
7606                 free(cache);
7607         }
7608         cache = lookup_cache_extent(reada, bytenr, size);
7609         if (cache) {
7610                 remove_cache_extent(reada, cache);
7611                 free(cache);
7612         }
7613         cache = lookup_cache_extent(nodes, bytenr, size);
7614         if (cache) {
7615                 remove_cache_extent(nodes, cache);
7616                 free(cache);
7617         }
7618         cache = lookup_cache_extent(extent_cache, bytenr, size);
7619         if (cache) {
7620                 rec = container_of(cache, struct extent_record, cache);
7621                 gen = rec->parent_generation;
7622         }
7623
7624         /* fixme, get the real parent transid */
7625         buf = read_tree_block(root, bytenr, size, gen);
7626         if (!extent_buffer_uptodate(buf)) {
7627                 record_bad_block_io(root->fs_info,
7628                                     extent_cache, bytenr, size);
7629                 goto out;
7630         }
7631
7632         nritems = btrfs_header_nritems(buf);
7633
7634         flags = 0;
7635         if (!init_extent_tree) {
7636                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7637                                        btrfs_header_level(buf), 1, NULL,
7638                                        &flags);
7639                 if (ret < 0) {
7640                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7641                         if (ret < 0) {
7642                                 fprintf(stderr, "Couldn't calc extent flags\n");
7643                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7644                         }
7645                 }
7646         } else {
7647                 flags = 0;
7648                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7649                 if (ret < 0) {
7650                         fprintf(stderr, "Couldn't calc extent flags\n");
7651                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7652                 }
7653         }
7654
7655         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7656                 if (ri != NULL &&
7657                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7658                     ri->objectid == btrfs_header_owner(buf)) {
7659                         /*
7660                          * Ok we got to this block from it's original owner and
7661                          * we have FULL_BACKREF set.  Relocation can leave
7662                          * converted blocks over so this is altogether possible,
7663                          * however it's not possible if the generation > the
7664                          * last snapshot, so check for this case.
7665                          */
7666                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7667                             btrfs_header_generation(buf) > ri->last_snapshot) {
7668                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7669                                 rec->bad_full_backref = 1;
7670                         }
7671                 }
7672         } else {
7673                 if (ri != NULL &&
7674                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7675                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7676                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7677                         rec->bad_full_backref = 1;
7678                 }
7679         }
7680
7681         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7682                 rec->flag_block_full_backref = 1;
7683                 parent = bytenr;
7684                 owner = 0;
7685         } else {
7686                 rec->flag_block_full_backref = 0;
7687                 parent = 0;
7688                 owner = btrfs_header_owner(buf);
7689         }
7690
7691         ret = check_block(root, extent_cache, buf, flags);
7692         if (ret)
7693                 goto out;
7694
7695         if (btrfs_is_leaf(buf)) {
7696                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7697                 for (i = 0; i < nritems; i++) {
7698                         struct btrfs_file_extent_item *fi;
7699                         btrfs_item_key_to_cpu(buf, &key, i);
7700                         /*
7701                          * Check key type against the leaf owner.
7702                          * Could filter quite a lot of early error if
7703                          * owner is correct
7704                          */
7705                         if (check_type_with_root(btrfs_header_owner(buf),
7706                                                  key.type)) {
7707                                 fprintf(stderr, "ignoring invalid key\n");
7708                                 continue;
7709                         }
7710                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7711                                 process_extent_item(root, extent_cache, buf,
7712                                                     i);
7713                                 continue;
7714                         }
7715                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7716                                 process_extent_item(root, extent_cache, buf,
7717                                                     i);
7718                                 continue;
7719                         }
7720                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7721                                 total_csum_bytes +=
7722                                         btrfs_item_size_nr(buf, i);
7723                                 continue;
7724                         }
7725                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7726                                 process_chunk_item(chunk_cache, &key, buf, i);
7727                                 continue;
7728                         }
7729                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7730                                 process_device_item(dev_cache, &key, buf, i);
7731                                 continue;
7732                         }
7733                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7734                                 process_block_group_item(block_group_cache,
7735                                         &key, buf, i);
7736                                 continue;
7737                         }
7738                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7739                                 process_device_extent_item(dev_extent_cache,
7740                                         &key, buf, i);
7741                                 continue;
7742
7743                         }
7744                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7745 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7746                                 process_extent_ref_v0(extent_cache, buf, i);
7747 #else
7748                                 BUG();
7749 #endif
7750                                 continue;
7751                         }
7752
7753                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7754                                 ret = add_tree_backref(extent_cache,
7755                                                 key.objectid, 0, key.offset, 0);
7756                                 if (ret < 0)
7757                                         error("add_tree_backref failed: %s",
7758                                               strerror(-ret));
7759                                 continue;
7760                         }
7761                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7762                                 ret = add_tree_backref(extent_cache,
7763                                                 key.objectid, key.offset, 0, 0);
7764                                 if (ret < 0)
7765                                         error("add_tree_backref failed: %s",
7766                                               strerror(-ret));
7767                                 continue;
7768                         }
7769                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7770                                 struct btrfs_extent_data_ref *ref;
7771                                 ref = btrfs_item_ptr(buf, i,
7772                                                 struct btrfs_extent_data_ref);
7773                                 add_data_backref(extent_cache,
7774                                         key.objectid, 0,
7775                                         btrfs_extent_data_ref_root(buf, ref),
7776                                         btrfs_extent_data_ref_objectid(buf,
7777                                                                        ref),
7778                                         btrfs_extent_data_ref_offset(buf, ref),
7779                                         btrfs_extent_data_ref_count(buf, ref),
7780                                         0, root->sectorsize);
7781                                 continue;
7782                         }
7783                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7784                                 struct btrfs_shared_data_ref *ref;
7785                                 ref = btrfs_item_ptr(buf, i,
7786                                                 struct btrfs_shared_data_ref);
7787                                 add_data_backref(extent_cache,
7788                                         key.objectid, key.offset, 0, 0, 0,
7789                                         btrfs_shared_data_ref_count(buf, ref),
7790                                         0, root->sectorsize);
7791                                 continue;
7792                         }
7793                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7794                                 struct bad_item *bad;
7795
7796                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7797                                         continue;
7798                                 if (!owner)
7799                                         continue;
7800                                 bad = malloc(sizeof(struct bad_item));
7801                                 if (!bad)
7802                                         continue;
7803                                 INIT_LIST_HEAD(&bad->list);
7804                                 memcpy(&bad->key, &key,
7805                                        sizeof(struct btrfs_key));
7806                                 bad->root_id = owner;
7807                                 list_add_tail(&bad->list, &delete_items);
7808                                 continue;
7809                         }
7810                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7811                                 continue;
7812                         fi = btrfs_item_ptr(buf, i,
7813                                             struct btrfs_file_extent_item);
7814                         if (btrfs_file_extent_type(buf, fi) ==
7815                             BTRFS_FILE_EXTENT_INLINE)
7816                                 continue;
7817                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7818                                 continue;
7819
7820                         data_bytes_allocated +=
7821                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7822                         if (data_bytes_allocated < root->sectorsize) {
7823                                 abort();
7824                         }
7825                         data_bytes_referenced +=
7826                                 btrfs_file_extent_num_bytes(buf, fi);
7827                         add_data_backref(extent_cache,
7828                                 btrfs_file_extent_disk_bytenr(buf, fi),
7829                                 parent, owner, key.objectid, key.offset -
7830                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7831                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7832                 }
7833         } else {
7834                 int level;
7835                 struct btrfs_key first_key;
7836
7837                 first_key.objectid = 0;
7838
7839                 if (nritems > 0)
7840                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7841                 level = btrfs_header_level(buf);
7842                 for (i = 0; i < nritems; i++) {
7843                         struct extent_record tmpl;
7844
7845                         ptr = btrfs_node_blockptr(buf, i);
7846                         size = root->nodesize;
7847                         btrfs_node_key_to_cpu(buf, &key, i);
7848                         if (ri != NULL) {
7849                                 if ((level == ri->drop_level)
7850                                     && is_dropped_key(&key, &ri->drop_key)) {
7851                                         continue;
7852                                 }
7853                         }
7854
7855                         memset(&tmpl, 0, sizeof(tmpl));
7856                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7857                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7858                         tmpl.start = ptr;
7859                         tmpl.nr = size;
7860                         tmpl.refs = 1;
7861                         tmpl.metadata = 1;
7862                         tmpl.max_size = size;
7863                         ret = add_extent_rec(extent_cache, &tmpl);
7864                         if (ret < 0)
7865                                 goto out;
7866
7867                         ret = add_tree_backref(extent_cache, ptr, parent,
7868                                         owner, 1);
7869                         if (ret < 0) {
7870                                 error("add_tree_backref failed: %s",
7871                                       strerror(-ret));
7872                                 continue;
7873                         }
7874
7875                         if (level > 1) {
7876                                 add_pending(nodes, seen, ptr, size);
7877                         } else {
7878                                 add_pending(pending, seen, ptr, size);
7879                         }
7880                 }
7881                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7882                                       nritems) * sizeof(struct btrfs_key_ptr);
7883         }
7884         total_btree_bytes += buf->len;
7885         if (fs_root_objectid(btrfs_header_owner(buf)))
7886                 total_fs_tree_bytes += buf->len;
7887         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7888                 total_extent_tree_bytes += buf->len;
7889         if (!found_old_backref &&
7890             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7891             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7892             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7893                 found_old_backref = 1;
7894 out:
7895         free_extent_buffer(buf);
7896         return ret;
7897 }
7898
7899 static int add_root_to_pending(struct extent_buffer *buf,
7900                                struct cache_tree *extent_cache,
7901                                struct cache_tree *pending,
7902                                struct cache_tree *seen,
7903                                struct cache_tree *nodes,
7904                                u64 objectid)
7905 {
7906         struct extent_record tmpl;
7907         int ret;
7908
7909         if (btrfs_header_level(buf) > 0)
7910                 add_pending(nodes, seen, buf->start, buf->len);
7911         else
7912                 add_pending(pending, seen, buf->start, buf->len);
7913
7914         memset(&tmpl, 0, sizeof(tmpl));
7915         tmpl.start = buf->start;
7916         tmpl.nr = buf->len;
7917         tmpl.is_root = 1;
7918         tmpl.refs = 1;
7919         tmpl.metadata = 1;
7920         tmpl.max_size = buf->len;
7921         add_extent_rec(extent_cache, &tmpl);
7922
7923         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7924             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7925                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7926                                 0, 1);
7927         else
7928                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7929                                 1);
7930         return ret;
7931 }
7932
7933 /* as we fix the tree, we might be deleting blocks that
7934  * we're tracking for repair.  This hook makes sure we
7935  * remove any backrefs for blocks as we are fixing them.
7936  */
7937 static int free_extent_hook(struct btrfs_trans_handle *trans,
7938                             struct btrfs_root *root,
7939                             u64 bytenr, u64 num_bytes, u64 parent,
7940                             u64 root_objectid, u64 owner, u64 offset,
7941                             int refs_to_drop)
7942 {
7943         struct extent_record *rec;
7944         struct cache_extent *cache;
7945         int is_data;
7946         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7947
7948         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7949         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7950         if (!cache)
7951                 return 0;
7952
7953         rec = container_of(cache, struct extent_record, cache);
7954         if (is_data) {
7955                 struct data_backref *back;
7956                 back = find_data_backref(rec, parent, root_objectid, owner,
7957                                          offset, 1, bytenr, num_bytes);
7958                 if (!back)
7959                         goto out;
7960                 if (back->node.found_ref) {
7961                         back->found_ref -= refs_to_drop;
7962                         if (rec->refs)
7963                                 rec->refs -= refs_to_drop;
7964                 }
7965                 if (back->node.found_extent_tree) {
7966                         back->num_refs -= refs_to_drop;
7967                         if (rec->extent_item_refs)
7968                                 rec->extent_item_refs -= refs_to_drop;
7969                 }
7970                 if (back->found_ref == 0)
7971                         back->node.found_ref = 0;
7972                 if (back->num_refs == 0)
7973                         back->node.found_extent_tree = 0;
7974
7975                 if (!back->node.found_extent_tree && back->node.found_ref) {
7976                         list_del(&back->node.list);
7977                         free(back);
7978                 }
7979         } else {
7980                 struct tree_backref *back;
7981                 back = find_tree_backref(rec, parent, root_objectid);
7982                 if (!back)
7983                         goto out;
7984                 if (back->node.found_ref) {
7985                         if (rec->refs)
7986                                 rec->refs--;
7987                         back->node.found_ref = 0;
7988                 }
7989                 if (back->node.found_extent_tree) {
7990                         if (rec->extent_item_refs)
7991                                 rec->extent_item_refs--;
7992                         back->node.found_extent_tree = 0;
7993                 }
7994                 if (!back->node.found_extent_tree && back->node.found_ref) {
7995                         list_del(&back->node.list);
7996                         free(back);
7997                 }
7998         }
7999         maybe_free_extent_rec(extent_cache, rec);
8000 out:
8001         return 0;
8002 }
8003
8004 static int delete_extent_records(struct btrfs_trans_handle *trans,
8005                                  struct btrfs_root *root,
8006                                  struct btrfs_path *path,
8007                                  u64 bytenr)
8008 {
8009         struct btrfs_key key;
8010         struct btrfs_key found_key;
8011         struct extent_buffer *leaf;
8012         int ret;
8013         int slot;
8014
8015
8016         key.objectid = bytenr;
8017         key.type = (u8)-1;
8018         key.offset = (u64)-1;
8019
8020         while(1) {
8021                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8022                                         &key, path, 0, 1);
8023                 if (ret < 0)
8024                         break;
8025
8026                 if (ret > 0) {
8027                         ret = 0;
8028                         if (path->slots[0] == 0)
8029                                 break;
8030                         path->slots[0]--;
8031                 }
8032                 ret = 0;
8033
8034                 leaf = path->nodes[0];
8035                 slot = path->slots[0];
8036
8037                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8038                 if (found_key.objectid != bytenr)
8039                         break;
8040
8041                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8042                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8043                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8044                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8045                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8046                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8047                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8048                         btrfs_release_path(path);
8049                         if (found_key.type == 0) {
8050                                 if (found_key.offset == 0)
8051                                         break;
8052                                 key.offset = found_key.offset - 1;
8053                                 key.type = found_key.type;
8054                         }
8055                         key.type = found_key.type - 1;
8056                         key.offset = (u64)-1;
8057                         continue;
8058                 }
8059
8060                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8061                         found_key.objectid, found_key.type, found_key.offset);
8062
8063                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8064                 if (ret)
8065                         break;
8066                 btrfs_release_path(path);
8067
8068                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8069                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8070                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8071                                 found_key.offset : root->nodesize;
8072
8073                         ret = btrfs_update_block_group(trans, root, bytenr,
8074                                                        bytes, 0, 0);
8075                         if (ret)
8076                                 break;
8077                 }
8078         }
8079
8080         btrfs_release_path(path);
8081         return ret;
8082 }
8083
8084 /*
8085  * for a single backref, this will allocate a new extent
8086  * and add the backref to it.
8087  */
8088 static int record_extent(struct btrfs_trans_handle *trans,
8089                          struct btrfs_fs_info *info,
8090                          struct btrfs_path *path,
8091                          struct extent_record *rec,
8092                          struct extent_backref *back,
8093                          int allocated, u64 flags)
8094 {
8095         int ret = 0;
8096         struct btrfs_root *extent_root = info->extent_root;
8097         struct extent_buffer *leaf;
8098         struct btrfs_key ins_key;
8099         struct btrfs_extent_item *ei;
8100         struct data_backref *dback;
8101         struct btrfs_tree_block_info *bi;
8102
8103         if (!back->is_data)
8104                 rec->max_size = max_t(u64, rec->max_size,
8105                                     info->extent_root->nodesize);
8106
8107         if (!allocated) {
8108                 u32 item_size = sizeof(*ei);
8109
8110                 if (!back->is_data)
8111                         item_size += sizeof(*bi);
8112
8113                 ins_key.objectid = rec->start;
8114                 ins_key.offset = rec->max_size;
8115                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8116
8117                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8118                                         &ins_key, item_size);
8119                 if (ret)
8120                         goto fail;
8121
8122                 leaf = path->nodes[0];
8123                 ei = btrfs_item_ptr(leaf, path->slots[0],
8124                                     struct btrfs_extent_item);
8125
8126                 btrfs_set_extent_refs(leaf, ei, 0);
8127                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8128
8129                 if (back->is_data) {
8130                         btrfs_set_extent_flags(leaf, ei,
8131                                                BTRFS_EXTENT_FLAG_DATA);
8132                 } else {
8133                         struct btrfs_disk_key copy_key;;
8134
8135                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8136                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8137                                              sizeof(*bi));
8138
8139                         btrfs_set_disk_key_objectid(&copy_key,
8140                                                     rec->info_objectid);
8141                         btrfs_set_disk_key_type(&copy_key, 0);
8142                         btrfs_set_disk_key_offset(&copy_key, 0);
8143
8144                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8145                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8146
8147                         btrfs_set_extent_flags(leaf, ei,
8148                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8149                 }
8150
8151                 btrfs_mark_buffer_dirty(leaf);
8152                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8153                                                rec->max_size, 1, 0);
8154                 if (ret)
8155                         goto fail;
8156                 btrfs_release_path(path);
8157         }
8158
8159         if (back->is_data) {
8160                 u64 parent;
8161                 int i;
8162
8163                 dback = to_data_backref(back);
8164                 if (back->full_backref)
8165                         parent = dback->parent;
8166                 else
8167                         parent = 0;
8168
8169                 for (i = 0; i < dback->found_ref; i++) {
8170                         /* if parent != 0, we're doing a full backref
8171                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8172                          * just makes the backref allocator create a data
8173                          * backref
8174                          */
8175                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8176                                                    rec->start, rec->max_size,
8177                                                    parent,
8178                                                    dback->root,
8179                                                    parent ?
8180                                                    BTRFS_FIRST_FREE_OBJECTID :
8181                                                    dback->owner,
8182                                                    dback->offset);
8183                         if (ret)
8184                                 break;
8185                 }
8186                 fprintf(stderr, "adding new data backref"
8187                                 " on %llu %s %llu owner %llu"
8188                                 " offset %llu found %d\n",
8189                                 (unsigned long long)rec->start,
8190                                 back->full_backref ?
8191                                 "parent" : "root",
8192                                 back->full_backref ?
8193                                 (unsigned long long)parent :
8194                                 (unsigned long long)dback->root,
8195                                 (unsigned long long)dback->owner,
8196                                 (unsigned long long)dback->offset,
8197                                 dback->found_ref);
8198         } else {
8199                 u64 parent;
8200                 struct tree_backref *tback;
8201
8202                 tback = to_tree_backref(back);
8203                 if (back->full_backref)
8204                         parent = tback->parent;
8205                 else
8206                         parent = 0;
8207
8208                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8209                                            rec->start, rec->max_size,
8210                                            parent, tback->root, 0, 0);
8211                 fprintf(stderr, "adding new tree backref on "
8212                         "start %llu len %llu parent %llu root %llu\n",
8213                         rec->start, rec->max_size, parent, tback->root);
8214         }
8215 fail:
8216         btrfs_release_path(path);
8217         return ret;
8218 }
8219
8220 static struct extent_entry *find_entry(struct list_head *entries,
8221                                        u64 bytenr, u64 bytes)
8222 {
8223         struct extent_entry *entry = NULL;
8224
8225         list_for_each_entry(entry, entries, list) {
8226                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8227                         return entry;
8228         }
8229
8230         return NULL;
8231 }
8232
8233 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8234 {
8235         struct extent_entry *entry, *best = NULL, *prev = NULL;
8236
8237         list_for_each_entry(entry, entries, list) {
8238                 /*
8239                  * If there are as many broken entries as entries then we know
8240                  * not to trust this particular entry.
8241                  */
8242                 if (entry->broken == entry->count)
8243                         continue;
8244
8245                 /*
8246                  * Special case, when there are only two entries and 'best' is
8247                  * the first one
8248                  */
8249                 if (!prev) {
8250                         best = entry;
8251                         prev = entry;
8252                         continue;
8253                 }
8254
8255                 /*
8256                  * If our current entry == best then we can't be sure our best
8257                  * is really the best, so we need to keep searching.
8258                  */
8259                 if (best && best->count == entry->count) {
8260                         prev = entry;
8261                         best = NULL;
8262                         continue;
8263                 }
8264
8265                 /* Prev == entry, not good enough, have to keep searching */
8266                 if (!prev->broken && prev->count == entry->count)
8267                         continue;
8268
8269                 if (!best)
8270                         best = (prev->count > entry->count) ? prev : entry;
8271                 else if (best->count < entry->count)
8272                         best = entry;
8273                 prev = entry;
8274         }
8275
8276         return best;
8277 }
8278
8279 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8280                       struct data_backref *dback, struct extent_entry *entry)
8281 {
8282         struct btrfs_trans_handle *trans;
8283         struct btrfs_root *root;
8284         struct btrfs_file_extent_item *fi;
8285         struct extent_buffer *leaf;
8286         struct btrfs_key key;
8287         u64 bytenr, bytes;
8288         int ret, err;
8289
8290         key.objectid = dback->root;
8291         key.type = BTRFS_ROOT_ITEM_KEY;
8292         key.offset = (u64)-1;
8293         root = btrfs_read_fs_root(info, &key);
8294         if (IS_ERR(root)) {
8295                 fprintf(stderr, "Couldn't find root for our ref\n");
8296                 return -EINVAL;
8297         }
8298
8299         /*
8300          * The backref points to the original offset of the extent if it was
8301          * split, so we need to search down to the offset we have and then walk
8302          * forward until we find the backref we're looking for.
8303          */
8304         key.objectid = dback->owner;
8305         key.type = BTRFS_EXTENT_DATA_KEY;
8306         key.offset = dback->offset;
8307         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8308         if (ret < 0) {
8309                 fprintf(stderr, "Error looking up ref %d\n", ret);
8310                 return ret;
8311         }
8312
8313         while (1) {
8314                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8315                         ret = btrfs_next_leaf(root, path);
8316                         if (ret) {
8317                                 fprintf(stderr, "Couldn't find our ref, next\n");
8318                                 return -EINVAL;
8319                         }
8320                 }
8321                 leaf = path->nodes[0];
8322                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8323                 if (key.objectid != dback->owner ||
8324                     key.type != BTRFS_EXTENT_DATA_KEY) {
8325                         fprintf(stderr, "Couldn't find our ref, search\n");
8326                         return -EINVAL;
8327                 }
8328                 fi = btrfs_item_ptr(leaf, path->slots[0],
8329                                     struct btrfs_file_extent_item);
8330                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8331                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8332
8333                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8334                         break;
8335                 path->slots[0]++;
8336         }
8337
8338         btrfs_release_path(path);
8339
8340         trans = btrfs_start_transaction(root, 1);
8341         if (IS_ERR(trans))
8342                 return PTR_ERR(trans);
8343
8344         /*
8345          * Ok we have the key of the file extent we want to fix, now we can cow
8346          * down to the thing and fix it.
8347          */
8348         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8349         if (ret < 0) {
8350                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8351                         key.objectid, key.type, key.offset, ret);
8352                 goto out;
8353         }
8354         if (ret > 0) {
8355                 fprintf(stderr, "Well that's odd, we just found this key "
8356                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8357                         key.offset);
8358                 ret = -EINVAL;
8359                 goto out;
8360         }
8361         leaf = path->nodes[0];
8362         fi = btrfs_item_ptr(leaf, path->slots[0],
8363                             struct btrfs_file_extent_item);
8364
8365         if (btrfs_file_extent_compression(leaf, fi) &&
8366             dback->disk_bytenr != entry->bytenr) {
8367                 fprintf(stderr, "Ref doesn't match the record start and is "
8368                         "compressed, please take a btrfs-image of this file "
8369                         "system and send it to a btrfs developer so they can "
8370                         "complete this functionality for bytenr %Lu\n",
8371                         dback->disk_bytenr);
8372                 ret = -EINVAL;
8373                 goto out;
8374         }
8375
8376         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8377                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8378         } else if (dback->disk_bytenr > entry->bytenr) {
8379                 u64 off_diff, offset;
8380
8381                 off_diff = dback->disk_bytenr - entry->bytenr;
8382                 offset = btrfs_file_extent_offset(leaf, fi);
8383                 if (dback->disk_bytenr + offset +
8384                     btrfs_file_extent_num_bytes(leaf, fi) >
8385                     entry->bytenr + entry->bytes) {
8386                         fprintf(stderr, "Ref is past the entry end, please "
8387                                 "take a btrfs-image of this file system and "
8388                                 "send it to a btrfs developer, ref %Lu\n",
8389                                 dback->disk_bytenr);
8390                         ret = -EINVAL;
8391                         goto out;
8392                 }
8393                 offset += off_diff;
8394                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8395                 btrfs_set_file_extent_offset(leaf, fi, offset);
8396         } else if (dback->disk_bytenr < entry->bytenr) {
8397                 u64 offset;
8398
8399                 offset = btrfs_file_extent_offset(leaf, fi);
8400                 if (dback->disk_bytenr + offset < entry->bytenr) {
8401                         fprintf(stderr, "Ref is before the entry start, please"
8402                                 " take a btrfs-image of this file system and "
8403                                 "send it to a btrfs developer, ref %Lu\n",
8404                                 dback->disk_bytenr);
8405                         ret = -EINVAL;
8406                         goto out;
8407                 }
8408
8409                 offset += dback->disk_bytenr;
8410                 offset -= entry->bytenr;
8411                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8412                 btrfs_set_file_extent_offset(leaf, fi, offset);
8413         }
8414
8415         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8416
8417         /*
8418          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8419          * only do this if we aren't using compression, otherwise it's a
8420          * trickier case.
8421          */
8422         if (!btrfs_file_extent_compression(leaf, fi))
8423                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8424         else
8425                 printf("ram bytes may be wrong?\n");
8426         btrfs_mark_buffer_dirty(leaf);
8427 out:
8428         err = btrfs_commit_transaction(trans, root);
8429         btrfs_release_path(path);
8430         return ret ? ret : err;
8431 }
8432
8433 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8434                            struct extent_record *rec)
8435 {
8436         struct extent_backref *back;
8437         struct data_backref *dback;
8438         struct extent_entry *entry, *best = NULL;
8439         LIST_HEAD(entries);
8440         int nr_entries = 0;
8441         int broken_entries = 0;
8442         int ret = 0;
8443         short mismatch = 0;
8444
8445         /*
8446          * Metadata is easy and the backrefs should always agree on bytenr and
8447          * size, if not we've got bigger issues.
8448          */
8449         if (rec->metadata)
8450                 return 0;
8451
8452         list_for_each_entry(back, &rec->backrefs, list) {
8453                 if (back->full_backref || !back->is_data)
8454                         continue;
8455
8456                 dback = to_data_backref(back);
8457
8458                 /*
8459                  * We only pay attention to backrefs that we found a real
8460                  * backref for.
8461                  */
8462                 if (dback->found_ref == 0)
8463                         continue;
8464
8465                 /*
8466                  * For now we only catch when the bytes don't match, not the
8467                  * bytenr.  We can easily do this at the same time, but I want
8468                  * to have a fs image to test on before we just add repair
8469                  * functionality willy-nilly so we know we won't screw up the
8470                  * repair.
8471                  */
8472
8473                 entry = find_entry(&entries, dback->disk_bytenr,
8474                                    dback->bytes);
8475                 if (!entry) {
8476                         entry = malloc(sizeof(struct extent_entry));
8477                         if (!entry) {
8478                                 ret = -ENOMEM;
8479                                 goto out;
8480                         }
8481                         memset(entry, 0, sizeof(*entry));
8482                         entry->bytenr = dback->disk_bytenr;
8483                         entry->bytes = dback->bytes;
8484                         list_add_tail(&entry->list, &entries);
8485                         nr_entries++;
8486                 }
8487
8488                 /*
8489                  * If we only have on entry we may think the entries agree when
8490                  * in reality they don't so we have to do some extra checking.
8491                  */
8492                 if (dback->disk_bytenr != rec->start ||
8493                     dback->bytes != rec->nr || back->broken)
8494                         mismatch = 1;
8495
8496                 if (back->broken) {
8497                         entry->broken++;
8498                         broken_entries++;
8499                 }
8500
8501                 entry->count++;
8502         }
8503
8504         /* Yay all the backrefs agree, carry on good sir */
8505         if (nr_entries <= 1 && !mismatch)
8506                 goto out;
8507
8508         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8509                 "%Lu\n", rec->start);
8510
8511         /*
8512          * First we want to see if the backrefs can agree amongst themselves who
8513          * is right, so figure out which one of the entries has the highest
8514          * count.
8515          */
8516         best = find_most_right_entry(&entries);
8517
8518         /*
8519          * Ok so we may have an even split between what the backrefs think, so
8520          * this is where we use the extent ref to see what it thinks.
8521          */
8522         if (!best) {
8523                 entry = find_entry(&entries, rec->start, rec->nr);
8524                 if (!entry && (!broken_entries || !rec->found_rec)) {
8525                         fprintf(stderr, "Backrefs don't agree with each other "
8526                                 "and extent record doesn't agree with anybody,"
8527                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8528                                 rec->start, rec->nr);
8529                         ret = -EINVAL;
8530                         goto out;
8531                 } else if (!entry) {
8532                         /*
8533                          * Ok our backrefs were broken, we'll assume this is the
8534                          * correct value and add an entry for this range.
8535                          */
8536                         entry = malloc(sizeof(struct extent_entry));
8537                         if (!entry) {
8538                                 ret = -ENOMEM;
8539                                 goto out;
8540                         }
8541                         memset(entry, 0, sizeof(*entry));
8542                         entry->bytenr = rec->start;
8543                         entry->bytes = rec->nr;
8544                         list_add_tail(&entry->list, &entries);
8545                         nr_entries++;
8546                 }
8547                 entry->count++;
8548                 best = find_most_right_entry(&entries);
8549                 if (!best) {
8550                         fprintf(stderr, "Backrefs and extent record evenly "
8551                                 "split on who is right, this is going to "
8552                                 "require user input to fix bytenr %Lu bytes "
8553                                 "%Lu\n", rec->start, rec->nr);
8554                         ret = -EINVAL;
8555                         goto out;
8556                 }
8557         }
8558
8559         /*
8560          * I don't think this can happen currently as we'll abort() if we catch
8561          * this case higher up, but in case somebody removes that we still can't
8562          * deal with it properly here yet, so just bail out of that's the case.
8563          */
8564         if (best->bytenr != rec->start) {
8565                 fprintf(stderr, "Extent start and backref starts don't match, "
8566                         "please use btrfs-image on this file system and send "
8567                         "it to a btrfs developer so they can make fsck fix "
8568                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8569                         rec->start, rec->nr);
8570                 ret = -EINVAL;
8571                 goto out;
8572         }
8573
8574         /*
8575          * Ok great we all agreed on an extent record, let's go find the real
8576          * references and fix up the ones that don't match.
8577          */
8578         list_for_each_entry(back, &rec->backrefs, list) {
8579                 if (back->full_backref || !back->is_data)
8580                         continue;
8581
8582                 dback = to_data_backref(back);
8583
8584                 /*
8585                  * Still ignoring backrefs that don't have a real ref attached
8586                  * to them.
8587                  */
8588                 if (dback->found_ref == 0)
8589                         continue;
8590
8591                 if (dback->bytes == best->bytes &&
8592                     dback->disk_bytenr == best->bytenr)
8593                         continue;
8594
8595                 ret = repair_ref(info, path, dback, best);
8596                 if (ret)
8597                         goto out;
8598         }
8599
8600         /*
8601          * Ok we messed with the actual refs, which means we need to drop our
8602          * entire cache and go back and rescan.  I know this is a huge pain and
8603          * adds a lot of extra work, but it's the only way to be safe.  Once all
8604          * the backrefs agree we may not need to do anything to the extent
8605          * record itself.
8606          */
8607         ret = -EAGAIN;
8608 out:
8609         while (!list_empty(&entries)) {
8610                 entry = list_entry(entries.next, struct extent_entry, list);
8611                 list_del_init(&entry->list);
8612                 free(entry);
8613         }
8614         return ret;
8615 }
8616
8617 static int process_duplicates(struct cache_tree *extent_cache,
8618                               struct extent_record *rec)
8619 {
8620         struct extent_record *good, *tmp;
8621         struct cache_extent *cache;
8622         int ret;
8623
8624         /*
8625          * If we found a extent record for this extent then return, or if we
8626          * have more than one duplicate we are likely going to need to delete
8627          * something.
8628          */
8629         if (rec->found_rec || rec->num_duplicates > 1)
8630                 return 0;
8631
8632         /* Shouldn't happen but just in case */
8633         BUG_ON(!rec->num_duplicates);
8634
8635         /*
8636          * So this happens if we end up with a backref that doesn't match the
8637          * actual extent entry.  So either the backref is bad or the extent
8638          * entry is bad.  Either way we want to have the extent_record actually
8639          * reflect what we found in the extent_tree, so we need to take the
8640          * duplicate out and use that as the extent_record since the only way we
8641          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8642          */
8643         remove_cache_extent(extent_cache, &rec->cache);
8644
8645         good = to_extent_record(rec->dups.next);
8646         list_del_init(&good->list);
8647         INIT_LIST_HEAD(&good->backrefs);
8648         INIT_LIST_HEAD(&good->dups);
8649         good->cache.start = good->start;
8650         good->cache.size = good->nr;
8651         good->content_checked = 0;
8652         good->owner_ref_checked = 0;
8653         good->num_duplicates = 0;
8654         good->refs = rec->refs;
8655         list_splice_init(&rec->backrefs, &good->backrefs);
8656         while (1) {
8657                 cache = lookup_cache_extent(extent_cache, good->start,
8658                                             good->nr);
8659                 if (!cache)
8660                         break;
8661                 tmp = container_of(cache, struct extent_record, cache);
8662
8663                 /*
8664                  * If we find another overlapping extent and it's found_rec is
8665                  * set then it's a duplicate and we need to try and delete
8666                  * something.
8667                  */
8668                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8669                         if (list_empty(&good->list))
8670                                 list_add_tail(&good->list,
8671                                               &duplicate_extents);
8672                         good->num_duplicates += tmp->num_duplicates + 1;
8673                         list_splice_init(&tmp->dups, &good->dups);
8674                         list_del_init(&tmp->list);
8675                         list_add_tail(&tmp->list, &good->dups);
8676                         remove_cache_extent(extent_cache, &tmp->cache);
8677                         continue;
8678                 }
8679
8680                 /*
8681                  * Ok we have another non extent item backed extent rec, so lets
8682                  * just add it to this extent and carry on like we did above.
8683                  */
8684                 good->refs += tmp->refs;
8685                 list_splice_init(&tmp->backrefs, &good->backrefs);
8686                 remove_cache_extent(extent_cache, &tmp->cache);
8687                 free(tmp);
8688         }
8689         ret = insert_cache_extent(extent_cache, &good->cache);
8690         BUG_ON(ret);
8691         free(rec);
8692         return good->num_duplicates ? 0 : 1;
8693 }
8694
8695 static int delete_duplicate_records(struct btrfs_root *root,
8696                                     struct extent_record *rec)
8697 {
8698         struct btrfs_trans_handle *trans;
8699         LIST_HEAD(delete_list);
8700         struct btrfs_path path;
8701         struct extent_record *tmp, *good, *n;
8702         int nr_del = 0;
8703         int ret = 0, err;
8704         struct btrfs_key key;
8705
8706         btrfs_init_path(&path);
8707
8708         good = rec;
8709         /* Find the record that covers all of the duplicates. */
8710         list_for_each_entry(tmp, &rec->dups, list) {
8711                 if (good->start < tmp->start)
8712                         continue;
8713                 if (good->nr > tmp->nr)
8714                         continue;
8715
8716                 if (tmp->start + tmp->nr < good->start + good->nr) {
8717                         fprintf(stderr, "Ok we have overlapping extents that "
8718                                 "aren't completely covered by each other, this "
8719                                 "is going to require more careful thought.  "
8720                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8721                                 tmp->start, tmp->nr, good->start, good->nr);
8722                         abort();
8723                 }
8724                 good = tmp;
8725         }
8726
8727         if (good != rec)
8728                 list_add_tail(&rec->list, &delete_list);
8729
8730         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8731                 if (tmp == good)
8732                         continue;
8733                 list_move_tail(&tmp->list, &delete_list);
8734         }
8735
8736         root = root->fs_info->extent_root;
8737         trans = btrfs_start_transaction(root, 1);
8738         if (IS_ERR(trans)) {
8739                 ret = PTR_ERR(trans);
8740                 goto out;
8741         }
8742
8743         list_for_each_entry(tmp, &delete_list, list) {
8744                 if (tmp->found_rec == 0)
8745                         continue;
8746                 key.objectid = tmp->start;
8747                 key.type = BTRFS_EXTENT_ITEM_KEY;
8748                 key.offset = tmp->nr;
8749
8750                 /* Shouldn't happen but just in case */
8751                 if (tmp->metadata) {
8752                         fprintf(stderr, "Well this shouldn't happen, extent "
8753                                 "record overlaps but is metadata? "
8754                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8755                         abort();
8756                 }
8757
8758                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8759                 if (ret) {
8760                         if (ret > 0)
8761                                 ret = -EINVAL;
8762                         break;
8763                 }
8764                 ret = btrfs_del_item(trans, root, &path);
8765                 if (ret)
8766                         break;
8767                 btrfs_release_path(&path);
8768                 nr_del++;
8769         }
8770         err = btrfs_commit_transaction(trans, root);
8771         if (err && !ret)
8772                 ret = err;
8773 out:
8774         while (!list_empty(&delete_list)) {
8775                 tmp = to_extent_record(delete_list.next);
8776                 list_del_init(&tmp->list);
8777                 if (tmp == rec)
8778                         continue;
8779                 free(tmp);
8780         }
8781
8782         while (!list_empty(&rec->dups)) {
8783                 tmp = to_extent_record(rec->dups.next);
8784                 list_del_init(&tmp->list);
8785                 free(tmp);
8786         }
8787
8788         btrfs_release_path(&path);
8789
8790         if (!ret && !nr_del)
8791                 rec->num_duplicates = 0;
8792
8793         return ret ? ret : nr_del;
8794 }
8795
8796 static int find_possible_backrefs(struct btrfs_fs_info *info,
8797                                   struct btrfs_path *path,
8798                                   struct cache_tree *extent_cache,
8799                                   struct extent_record *rec)
8800 {
8801         struct btrfs_root *root;
8802         struct extent_backref *back;
8803         struct data_backref *dback;
8804         struct cache_extent *cache;
8805         struct btrfs_file_extent_item *fi;
8806         struct btrfs_key key;
8807         u64 bytenr, bytes;
8808         int ret;
8809
8810         list_for_each_entry(back, &rec->backrefs, list) {
8811                 /* Don't care about full backrefs (poor unloved backrefs) */
8812                 if (back->full_backref || !back->is_data)
8813                         continue;
8814
8815                 dback = to_data_backref(back);
8816
8817                 /* We found this one, we don't need to do a lookup */
8818                 if (dback->found_ref)
8819                         continue;
8820
8821                 key.objectid = dback->root;
8822                 key.type = BTRFS_ROOT_ITEM_KEY;
8823                 key.offset = (u64)-1;
8824
8825                 root = btrfs_read_fs_root(info, &key);
8826
8827                 /* No root, definitely a bad ref, skip */
8828                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8829                         continue;
8830                 /* Other err, exit */
8831                 if (IS_ERR(root))
8832                         return PTR_ERR(root);
8833
8834                 key.objectid = dback->owner;
8835                 key.type = BTRFS_EXTENT_DATA_KEY;
8836                 key.offset = dback->offset;
8837                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8838                 if (ret) {
8839                         btrfs_release_path(path);
8840                         if (ret < 0)
8841                                 return ret;
8842                         /* Didn't find it, we can carry on */
8843                         ret = 0;
8844                         continue;
8845                 }
8846
8847                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8848                                     struct btrfs_file_extent_item);
8849                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8850                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8851                 btrfs_release_path(path);
8852                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8853                 if (cache) {
8854                         struct extent_record *tmp;
8855                         tmp = container_of(cache, struct extent_record, cache);
8856
8857                         /*
8858                          * If we found an extent record for the bytenr for this
8859                          * particular backref then we can't add it to our
8860                          * current extent record.  We only want to add backrefs
8861                          * that don't have a corresponding extent item in the
8862                          * extent tree since they likely belong to this record
8863                          * and we need to fix it if it doesn't match bytenrs.
8864                          */
8865                         if  (tmp->found_rec)
8866                                 continue;
8867                 }
8868
8869                 dback->found_ref += 1;
8870                 dback->disk_bytenr = bytenr;
8871                 dback->bytes = bytes;
8872
8873                 /*
8874                  * Set this so the verify backref code knows not to trust the
8875                  * values in this backref.
8876                  */
8877                 back->broken = 1;
8878         }
8879
8880         return 0;
8881 }
8882
8883 /*
8884  * Record orphan data ref into corresponding root.
8885  *
8886  * Return 0 if the extent item contains data ref and recorded.
8887  * Return 1 if the extent item contains no useful data ref
8888  *   On that case, it may contains only shared_dataref or metadata backref
8889  *   or the file extent exists(this should be handled by the extent bytenr
8890  *   recovery routine)
8891  * Return <0 if something goes wrong.
8892  */
8893 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8894                                       struct extent_record *rec)
8895 {
8896         struct btrfs_key key;
8897         struct btrfs_root *dest_root;
8898         struct extent_backref *back;
8899         struct data_backref *dback;
8900         struct orphan_data_extent *orphan;
8901         struct btrfs_path path;
8902         int recorded_data_ref = 0;
8903         int ret = 0;
8904
8905         if (rec->metadata)
8906                 return 1;
8907         btrfs_init_path(&path);
8908         list_for_each_entry(back, &rec->backrefs, list) {
8909                 if (back->full_backref || !back->is_data ||
8910                     !back->found_extent_tree)
8911                         continue;
8912                 dback = to_data_backref(back);
8913                 if (dback->found_ref)
8914                         continue;
8915                 key.objectid = dback->root;
8916                 key.type = BTRFS_ROOT_ITEM_KEY;
8917                 key.offset = (u64)-1;
8918
8919                 dest_root = btrfs_read_fs_root(fs_info, &key);
8920
8921                 /* For non-exist root we just skip it */
8922                 if (IS_ERR(dest_root) || !dest_root)
8923                         continue;
8924
8925                 key.objectid = dback->owner;
8926                 key.type = BTRFS_EXTENT_DATA_KEY;
8927                 key.offset = dback->offset;
8928
8929                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8930                 btrfs_release_path(&path);
8931                 /*
8932                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8933                  * we need to record it for inode/file extent rebuild.
8934                  * For ret > 0, we record it only for file extent rebuild.
8935                  * For ret == 0, the file extent exists but only bytenr
8936                  * mismatch, let the original bytenr fix routine to handle,
8937                  * don't record it.
8938                  */
8939                 if (ret == 0)
8940                         continue;
8941                 ret = 0;
8942                 orphan = malloc(sizeof(*orphan));
8943                 if (!orphan) {
8944                         ret = -ENOMEM;
8945                         goto out;
8946                 }
8947                 INIT_LIST_HEAD(&orphan->list);
8948                 orphan->root = dback->root;
8949                 orphan->objectid = dback->owner;
8950                 orphan->offset = dback->offset;
8951                 orphan->disk_bytenr = rec->cache.start;
8952                 orphan->disk_len = rec->cache.size;
8953                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8954                 recorded_data_ref = 1;
8955         }
8956 out:
8957         btrfs_release_path(&path);
8958         if (!ret)
8959                 return !recorded_data_ref;
8960         else
8961                 return ret;
8962 }
8963
8964 /*
8965  * when an incorrect extent item is found, this will delete
8966  * all of the existing entries for it and recreate them
8967  * based on what the tree scan found.
8968  */
8969 static int fixup_extent_refs(struct btrfs_fs_info *info,
8970                              struct cache_tree *extent_cache,
8971                              struct extent_record *rec)
8972 {
8973         struct btrfs_trans_handle *trans = NULL;
8974         int ret;
8975         struct btrfs_path path;
8976         struct list_head *cur = rec->backrefs.next;
8977         struct cache_extent *cache;
8978         struct extent_backref *back;
8979         int allocated = 0;
8980         u64 flags = 0;
8981
8982         if (rec->flag_block_full_backref)
8983                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8984
8985         btrfs_init_path(&path);
8986         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8987                 /*
8988                  * Sometimes the backrefs themselves are so broken they don't
8989                  * get attached to any meaningful rec, so first go back and
8990                  * check any of our backrefs that we couldn't find and throw
8991                  * them into the list if we find the backref so that
8992                  * verify_backrefs can figure out what to do.
8993                  */
8994                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8995                 if (ret < 0)
8996                         goto out;
8997         }
8998
8999         /* step one, make sure all of the backrefs agree */
9000         ret = verify_backrefs(info, &path, rec);
9001         if (ret < 0)
9002                 goto out;
9003
9004         trans = btrfs_start_transaction(info->extent_root, 1);
9005         if (IS_ERR(trans)) {
9006                 ret = PTR_ERR(trans);
9007                 goto out;
9008         }
9009
9010         /* step two, delete all the existing records */
9011         ret = delete_extent_records(trans, info->extent_root, &path,
9012                                     rec->start);
9013
9014         if (ret < 0)
9015                 goto out;
9016
9017         /* was this block corrupt?  If so, don't add references to it */
9018         cache = lookup_cache_extent(info->corrupt_blocks,
9019                                     rec->start, rec->max_size);
9020         if (cache) {
9021                 ret = 0;
9022                 goto out;
9023         }
9024
9025         /* step three, recreate all the refs we did find */
9026         while(cur != &rec->backrefs) {
9027                 back = to_extent_backref(cur);
9028                 cur = cur->next;
9029
9030                 /*
9031                  * if we didn't find any references, don't create a
9032                  * new extent record
9033                  */
9034                 if (!back->found_ref)
9035                         continue;
9036
9037                 rec->bad_full_backref = 0;
9038                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9039                 allocated = 1;
9040
9041                 if (ret)
9042                         goto out;
9043         }
9044 out:
9045         if (trans) {
9046                 int err = btrfs_commit_transaction(trans, info->extent_root);
9047                 if (!ret)
9048                         ret = err;
9049         }
9050
9051         if (!ret)
9052                 fprintf(stderr, "Repaired extent references for %llu\n",
9053                                 (unsigned long long)rec->start);
9054
9055         btrfs_release_path(&path);
9056         return ret;
9057 }
9058
9059 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9060                               struct extent_record *rec)
9061 {
9062         struct btrfs_trans_handle *trans;
9063         struct btrfs_root *root = fs_info->extent_root;
9064         struct btrfs_path path;
9065         struct btrfs_extent_item *ei;
9066         struct btrfs_key key;
9067         u64 flags;
9068         int ret = 0;
9069
9070         key.objectid = rec->start;
9071         if (rec->metadata) {
9072                 key.type = BTRFS_METADATA_ITEM_KEY;
9073                 key.offset = rec->info_level;
9074         } else {
9075                 key.type = BTRFS_EXTENT_ITEM_KEY;
9076                 key.offset = rec->max_size;
9077         }
9078
9079         trans = btrfs_start_transaction(root, 0);
9080         if (IS_ERR(trans))
9081                 return PTR_ERR(trans);
9082
9083         btrfs_init_path(&path);
9084         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9085         if (ret < 0) {
9086                 btrfs_release_path(&path);
9087                 btrfs_commit_transaction(trans, root);
9088                 return ret;
9089         } else if (ret) {
9090                 fprintf(stderr, "Didn't find extent for %llu\n",
9091                         (unsigned long long)rec->start);
9092                 btrfs_release_path(&path);
9093                 btrfs_commit_transaction(trans, root);
9094                 return -ENOENT;
9095         }
9096
9097         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9098                             struct btrfs_extent_item);
9099         flags = btrfs_extent_flags(path.nodes[0], ei);
9100         if (rec->flag_block_full_backref) {
9101                 fprintf(stderr, "setting full backref on %llu\n",
9102                         (unsigned long long)key.objectid);
9103                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9104         } else {
9105                 fprintf(stderr, "clearing full backref on %llu\n",
9106                         (unsigned long long)key.objectid);
9107                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9108         }
9109         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9110         btrfs_mark_buffer_dirty(path.nodes[0]);
9111         btrfs_release_path(&path);
9112         ret = btrfs_commit_transaction(trans, root);
9113         if (!ret)
9114                 fprintf(stderr, "Repaired extent flags for %llu\n",
9115                                 (unsigned long long)rec->start);
9116
9117         return ret;
9118 }
9119
9120 /* right now we only prune from the extent allocation tree */
9121 static int prune_one_block(struct btrfs_trans_handle *trans,
9122                            struct btrfs_fs_info *info,
9123                            struct btrfs_corrupt_block *corrupt)
9124 {
9125         int ret;
9126         struct btrfs_path path;
9127         struct extent_buffer *eb;
9128         u64 found;
9129         int slot;
9130         int nritems;
9131         int level = corrupt->level + 1;
9132
9133         btrfs_init_path(&path);
9134 again:
9135         /* we want to stop at the parent to our busted block */
9136         path.lowest_level = level;
9137
9138         ret = btrfs_search_slot(trans, info->extent_root,
9139                                 &corrupt->key, &path, -1, 1);
9140
9141         if (ret < 0)
9142                 goto out;
9143
9144         eb = path.nodes[level];
9145         if (!eb) {
9146                 ret = -ENOENT;
9147                 goto out;
9148         }
9149
9150         /*
9151          * hopefully the search gave us the block we want to prune,
9152          * lets try that first
9153          */
9154         slot = path.slots[level];
9155         found =  btrfs_node_blockptr(eb, slot);
9156         if (found == corrupt->cache.start)
9157                 goto del_ptr;
9158
9159         nritems = btrfs_header_nritems(eb);
9160
9161         /* the search failed, lets scan this node and hope we find it */
9162         for (slot = 0; slot < nritems; slot++) {
9163                 found =  btrfs_node_blockptr(eb, slot);
9164                 if (found == corrupt->cache.start)
9165                         goto del_ptr;
9166         }
9167         /*
9168          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9169          * to this block
9170          */
9171         if (eb == info->extent_root->node) {
9172                 ret = -ENOENT;
9173                 goto out;
9174         } else {
9175                 level++;
9176                 btrfs_release_path(&path);
9177                 goto again;
9178         }
9179
9180 del_ptr:
9181         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9182         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9183
9184 out:
9185         btrfs_release_path(&path);
9186         return ret;
9187 }
9188
9189 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9190 {
9191         struct btrfs_trans_handle *trans = NULL;
9192         struct cache_extent *cache;
9193         struct btrfs_corrupt_block *corrupt;
9194
9195         while (1) {
9196                 cache = search_cache_extent(info->corrupt_blocks, 0);
9197                 if (!cache)
9198                         break;
9199                 if (!trans) {
9200                         trans = btrfs_start_transaction(info->extent_root, 1);
9201                         if (IS_ERR(trans))
9202                                 return PTR_ERR(trans);
9203                 }
9204                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9205                 prune_one_block(trans, info, corrupt);
9206                 remove_cache_extent(info->corrupt_blocks, cache);
9207         }
9208         if (trans)
9209                 return btrfs_commit_transaction(trans, info->extent_root);
9210         return 0;
9211 }
9212
9213 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9214 {
9215         struct btrfs_block_group_cache *cache;
9216         u64 start, end;
9217         int ret;
9218
9219         while (1) {
9220                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9221                                             &start, &end, EXTENT_DIRTY);
9222                 if (ret)
9223                         break;
9224                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9225         }
9226
9227         start = 0;
9228         while (1) {
9229                 cache = btrfs_lookup_first_block_group(fs_info, start);
9230                 if (!cache)
9231                         break;
9232                 if (cache->cached)
9233                         cache->cached = 0;
9234                 start = cache->key.objectid + cache->key.offset;
9235         }
9236 }
9237
9238 static int check_extent_refs(struct btrfs_root *root,
9239                              struct cache_tree *extent_cache)
9240 {
9241         struct extent_record *rec;
9242         struct cache_extent *cache;
9243         int ret = 0;
9244         int had_dups = 0;
9245
9246         if (repair) {
9247                 /*
9248                  * if we're doing a repair, we have to make sure
9249                  * we don't allocate from the problem extents.
9250                  * In the worst case, this will be all the
9251                  * extents in the FS
9252                  */
9253                 cache = search_cache_extent(extent_cache, 0);
9254                 while(cache) {
9255                         rec = container_of(cache, struct extent_record, cache);
9256                         set_extent_dirty(root->fs_info->excluded_extents,
9257                                          rec->start,
9258                                          rec->start + rec->max_size - 1);
9259                         cache = next_cache_extent(cache);
9260                 }
9261
9262                 /* pin down all the corrupted blocks too */
9263                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9264                 while(cache) {
9265                         set_extent_dirty(root->fs_info->excluded_extents,
9266                                          cache->start,
9267                                          cache->start + cache->size - 1);
9268                         cache = next_cache_extent(cache);
9269                 }
9270                 prune_corrupt_blocks(root->fs_info);
9271                 reset_cached_block_groups(root->fs_info);
9272         }
9273
9274         reset_cached_block_groups(root->fs_info);
9275
9276         /*
9277          * We need to delete any duplicate entries we find first otherwise we
9278          * could mess up the extent tree when we have backrefs that actually
9279          * belong to a different extent item and not the weird duplicate one.
9280          */
9281         while (repair && !list_empty(&duplicate_extents)) {
9282                 rec = to_extent_record(duplicate_extents.next);
9283                 list_del_init(&rec->list);
9284
9285                 /* Sometimes we can find a backref before we find an actual
9286                  * extent, so we need to process it a little bit to see if there
9287                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9288                  * if this is a backref screwup.  If we need to delete stuff
9289                  * process_duplicates() will return 0, otherwise it will return
9290                  * 1 and we
9291                  */
9292                 if (process_duplicates(extent_cache, rec))
9293                         continue;
9294                 ret = delete_duplicate_records(root, rec);
9295                 if (ret < 0)
9296                         return ret;
9297                 /*
9298                  * delete_duplicate_records will return the number of entries
9299                  * deleted, so if it's greater than 0 then we know we actually
9300                  * did something and we need to remove.
9301                  */
9302                 if (ret)
9303                         had_dups = 1;
9304         }
9305
9306         if (had_dups)
9307                 return -EAGAIN;
9308
9309         while(1) {
9310                 int cur_err = 0;
9311                 int fix = 0;
9312
9313                 cache = search_cache_extent(extent_cache, 0);
9314                 if (!cache)
9315                         break;
9316                 rec = container_of(cache, struct extent_record, cache);
9317                 if (rec->num_duplicates) {
9318                         fprintf(stderr, "extent item %llu has multiple extent "
9319                                 "items\n", (unsigned long long)rec->start);
9320                         cur_err = 1;
9321                 }
9322
9323                 if (rec->refs != rec->extent_item_refs) {
9324                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9325                                 (unsigned long long)rec->start,
9326                                 (unsigned long long)rec->nr);
9327                         fprintf(stderr, "extent item %llu, found %llu\n",
9328                                 (unsigned long long)rec->extent_item_refs,
9329                                 (unsigned long long)rec->refs);
9330                         ret = record_orphan_data_extents(root->fs_info, rec);
9331                         if (ret < 0)
9332                                 goto repair_abort;
9333                         fix = ret;
9334                         cur_err = 1;
9335                 }
9336                 if (all_backpointers_checked(rec, 1)) {
9337                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9338                                 (unsigned long long)rec->start,
9339                                 (unsigned long long)rec->nr);
9340                         fix = 1;
9341                         cur_err = 1;
9342                 }
9343                 if (!rec->owner_ref_checked) {
9344                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9345                                 (unsigned long long)rec->start,
9346                                 (unsigned long long)rec->nr);
9347                         fix = 1;
9348                         cur_err = 1;
9349                 }
9350
9351                 if (repair && fix) {
9352                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9353                         if (ret)
9354                                 goto repair_abort;
9355                 }
9356
9357
9358                 if (rec->bad_full_backref) {
9359                         fprintf(stderr, "bad full backref, on [%llu]\n",
9360                                 (unsigned long long)rec->start);
9361                         if (repair) {
9362                                 ret = fixup_extent_flags(root->fs_info, rec);
9363                                 if (ret)
9364                                         goto repair_abort;
9365                                 fix = 1;
9366                         }
9367                         cur_err = 1;
9368                 }
9369                 /*
9370                  * Although it's not a extent ref's problem, we reuse this
9371                  * routine for error reporting.
9372                  * No repair function yet.
9373                  */
9374                 if (rec->crossing_stripes) {
9375                         fprintf(stderr,
9376                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9377                                 rec->start, rec->start + rec->max_size);
9378                         cur_err = 1;
9379                 }
9380
9381                 if (rec->wrong_chunk_type) {
9382                         fprintf(stderr,
9383                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9384                                 rec->start, rec->start + rec->max_size);
9385                         cur_err = 1;
9386                 }
9387
9388                 remove_cache_extent(extent_cache, cache);
9389                 free_all_extent_backrefs(rec);
9390                 if (!init_extent_tree && repair && (!cur_err || fix))
9391                         clear_extent_dirty(root->fs_info->excluded_extents,
9392                                            rec->start,
9393                                            rec->start + rec->max_size - 1);
9394                 free(rec);
9395         }
9396 repair_abort:
9397         if (repair) {
9398                 if (ret && ret != -EAGAIN) {
9399                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9400                         exit(1);
9401                 } else if (!ret) {
9402                         struct btrfs_trans_handle *trans;
9403
9404                         root = root->fs_info->extent_root;
9405                         trans = btrfs_start_transaction(root, 1);
9406                         if (IS_ERR(trans)) {
9407                                 ret = PTR_ERR(trans);
9408                                 goto repair_abort;
9409                         }
9410
9411                         btrfs_fix_block_accounting(trans, root);
9412                         ret = btrfs_commit_transaction(trans, root);
9413                         if (ret)
9414                                 goto repair_abort;
9415                 }
9416                 return ret;
9417         }
9418         return 0;
9419 }
9420
9421 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9422 {
9423         u64 stripe_size;
9424
9425         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9426                 stripe_size = length;
9427                 stripe_size /= num_stripes;
9428         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9429                 stripe_size = length * 2;
9430                 stripe_size /= num_stripes;
9431         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9432                 stripe_size = length;
9433                 stripe_size /= (num_stripes - 1);
9434         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9435                 stripe_size = length;
9436                 stripe_size /= (num_stripes - 2);
9437         } else {
9438                 stripe_size = length;
9439         }
9440         return stripe_size;
9441 }
9442
9443 /*
9444  * Check the chunk with its block group/dev list ref:
9445  * Return 0 if all refs seems valid.
9446  * Return 1 if part of refs seems valid, need later check for rebuild ref
9447  * like missing block group and needs to search extent tree to rebuild them.
9448  * Return -1 if essential refs are missing and unable to rebuild.
9449  */
9450 static int check_chunk_refs(struct chunk_record *chunk_rec,
9451                             struct block_group_tree *block_group_cache,
9452                             struct device_extent_tree *dev_extent_cache,
9453                             int silent)
9454 {
9455         struct cache_extent *block_group_item;
9456         struct block_group_record *block_group_rec;
9457         struct cache_extent *dev_extent_item;
9458         struct device_extent_record *dev_extent_rec;
9459         u64 devid;
9460         u64 offset;
9461         u64 length;
9462         int metadump_v2 = 0;
9463         int i;
9464         int ret = 0;
9465
9466         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9467                                                chunk_rec->offset,
9468                                                chunk_rec->length);
9469         if (block_group_item) {
9470                 block_group_rec = container_of(block_group_item,
9471                                                struct block_group_record,
9472                                                cache);
9473                 if (chunk_rec->length != block_group_rec->offset ||
9474                     chunk_rec->offset != block_group_rec->objectid ||
9475                     (!metadump_v2 &&
9476                      chunk_rec->type_flags != block_group_rec->flags)) {
9477                         if (!silent)
9478                                 fprintf(stderr,
9479                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9480                                         chunk_rec->objectid,
9481                                         chunk_rec->type,
9482                                         chunk_rec->offset,
9483                                         chunk_rec->length,
9484                                         chunk_rec->offset,
9485                                         chunk_rec->type_flags,
9486                                         block_group_rec->objectid,
9487                                         block_group_rec->type,
9488                                         block_group_rec->offset,
9489                                         block_group_rec->offset,
9490                                         block_group_rec->objectid,
9491                                         block_group_rec->flags);
9492                         ret = -1;
9493                 } else {
9494                         list_del_init(&block_group_rec->list);
9495                         chunk_rec->bg_rec = block_group_rec;
9496                 }
9497         } else {
9498                 if (!silent)
9499                         fprintf(stderr,
9500                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9501                                 chunk_rec->objectid,
9502                                 chunk_rec->type,
9503                                 chunk_rec->offset,
9504                                 chunk_rec->length,
9505                                 chunk_rec->offset,
9506                                 chunk_rec->type_flags);
9507                 ret = 1;
9508         }
9509
9510         if (metadump_v2)
9511                 return ret;
9512
9513         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9514                                     chunk_rec->num_stripes);
9515         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9516                 devid = chunk_rec->stripes[i].devid;
9517                 offset = chunk_rec->stripes[i].offset;
9518                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9519                                                        devid, offset, length);
9520                 if (dev_extent_item) {
9521                         dev_extent_rec = container_of(dev_extent_item,
9522                                                 struct device_extent_record,
9523                                                 cache);
9524                         if (dev_extent_rec->objectid != devid ||
9525                             dev_extent_rec->offset != offset ||
9526                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9527                             dev_extent_rec->length != length) {
9528                                 if (!silent)
9529                                         fprintf(stderr,
9530                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9531                                                 chunk_rec->objectid,
9532                                                 chunk_rec->type,
9533                                                 chunk_rec->offset,
9534                                                 chunk_rec->stripes[i].devid,
9535                                                 chunk_rec->stripes[i].offset,
9536                                                 dev_extent_rec->objectid,
9537                                                 dev_extent_rec->offset,
9538                                                 dev_extent_rec->length);
9539                                 ret = -1;
9540                         } else {
9541                                 list_move(&dev_extent_rec->chunk_list,
9542                                           &chunk_rec->dextents);
9543                         }
9544                 } else {
9545                         if (!silent)
9546                                 fprintf(stderr,
9547                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9548                                         chunk_rec->objectid,
9549                                         chunk_rec->type,
9550                                         chunk_rec->offset,
9551                                         chunk_rec->stripes[i].devid,
9552                                         chunk_rec->stripes[i].offset);
9553                         ret = -1;
9554                 }
9555         }
9556         return ret;
9557 }
9558
9559 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9560 int check_chunks(struct cache_tree *chunk_cache,
9561                  struct block_group_tree *block_group_cache,
9562                  struct device_extent_tree *dev_extent_cache,
9563                  struct list_head *good, struct list_head *bad,
9564                  struct list_head *rebuild, int silent)
9565 {
9566         struct cache_extent *chunk_item;
9567         struct chunk_record *chunk_rec;
9568         struct block_group_record *bg_rec;
9569         struct device_extent_record *dext_rec;
9570         int err;
9571         int ret = 0;
9572
9573         chunk_item = first_cache_extent(chunk_cache);
9574         while (chunk_item) {
9575                 chunk_rec = container_of(chunk_item, struct chunk_record,
9576                                          cache);
9577                 err = check_chunk_refs(chunk_rec, block_group_cache,
9578                                        dev_extent_cache, silent);
9579                 if (err < 0)
9580                         ret = err;
9581                 if (err == 0 && good)
9582                         list_add_tail(&chunk_rec->list, good);
9583                 if (err > 0 && rebuild)
9584                         list_add_tail(&chunk_rec->list, rebuild);
9585                 if (err < 0 && bad)
9586                         list_add_tail(&chunk_rec->list, bad);
9587                 chunk_item = next_cache_extent(chunk_item);
9588         }
9589
9590         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9591                 if (!silent)
9592                         fprintf(stderr,
9593                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9594                                 bg_rec->objectid,
9595                                 bg_rec->offset,
9596                                 bg_rec->flags);
9597                 if (!ret)
9598                         ret = 1;
9599         }
9600
9601         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9602                             chunk_list) {
9603                 if (!silent)
9604                         fprintf(stderr,
9605                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9606                                 dext_rec->objectid,
9607                                 dext_rec->offset,
9608                                 dext_rec->length);
9609                 if (!ret)
9610                         ret = 1;
9611         }
9612         return ret;
9613 }
9614
9615
9616 static int check_device_used(struct device_record *dev_rec,
9617                              struct device_extent_tree *dext_cache)
9618 {
9619         struct cache_extent *cache;
9620         struct device_extent_record *dev_extent_rec;
9621         u64 total_byte = 0;
9622
9623         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9624         while (cache) {
9625                 dev_extent_rec = container_of(cache,
9626                                               struct device_extent_record,
9627                                               cache);
9628                 if (dev_extent_rec->objectid != dev_rec->devid)
9629                         break;
9630
9631                 list_del_init(&dev_extent_rec->device_list);
9632                 total_byte += dev_extent_rec->length;
9633                 cache = next_cache_extent(cache);
9634         }
9635
9636         if (total_byte != dev_rec->byte_used) {
9637                 fprintf(stderr,
9638                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9639                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9640                         dev_rec->type, dev_rec->offset);
9641                 return -1;
9642         } else {
9643                 return 0;
9644         }
9645 }
9646
9647 /* check btrfs_dev_item -> btrfs_dev_extent */
9648 static int check_devices(struct rb_root *dev_cache,
9649                          struct device_extent_tree *dev_extent_cache)
9650 {
9651         struct rb_node *dev_node;
9652         struct device_record *dev_rec;
9653         struct device_extent_record *dext_rec;
9654         int err;
9655         int ret = 0;
9656
9657         dev_node = rb_first(dev_cache);
9658         while (dev_node) {
9659                 dev_rec = container_of(dev_node, struct device_record, node);
9660                 err = check_device_used(dev_rec, dev_extent_cache);
9661                 if (err)
9662                         ret = err;
9663
9664                 dev_node = rb_next(dev_node);
9665         }
9666         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9667                             device_list) {
9668                 fprintf(stderr,
9669                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9670                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9671                 if (!ret)
9672                         ret = 1;
9673         }
9674         return ret;
9675 }
9676
9677 static int add_root_item_to_list(struct list_head *head,
9678                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9679                                   u8 level, u8 drop_level,
9680                                   int level_size, struct btrfs_key *drop_key)
9681 {
9682
9683         struct root_item_record *ri_rec;
9684         ri_rec = malloc(sizeof(*ri_rec));
9685         if (!ri_rec)
9686                 return -ENOMEM;
9687         ri_rec->bytenr = bytenr;
9688         ri_rec->objectid = objectid;
9689         ri_rec->level = level;
9690         ri_rec->level_size = level_size;
9691         ri_rec->drop_level = drop_level;
9692         ri_rec->last_snapshot = last_snapshot;
9693         if (drop_key)
9694                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9695         list_add_tail(&ri_rec->list, head);
9696
9697         return 0;
9698 }
9699
9700 static void free_root_item_list(struct list_head *list)
9701 {
9702         struct root_item_record *ri_rec;
9703
9704         while (!list_empty(list)) {
9705                 ri_rec = list_first_entry(list, struct root_item_record,
9706                                           list);
9707                 list_del_init(&ri_rec->list);
9708                 free(ri_rec);
9709         }
9710 }
9711
9712 static int deal_root_from_list(struct list_head *list,
9713                                struct btrfs_root *root,
9714                                struct block_info *bits,
9715                                int bits_nr,
9716                                struct cache_tree *pending,
9717                                struct cache_tree *seen,
9718                                struct cache_tree *reada,
9719                                struct cache_tree *nodes,
9720                                struct cache_tree *extent_cache,
9721                                struct cache_tree *chunk_cache,
9722                                struct rb_root *dev_cache,
9723                                struct block_group_tree *block_group_cache,
9724                                struct device_extent_tree *dev_extent_cache)
9725 {
9726         int ret = 0;
9727         u64 last;
9728
9729         while (!list_empty(list)) {
9730                 struct root_item_record *rec;
9731                 struct extent_buffer *buf;
9732                 rec = list_entry(list->next,
9733                                  struct root_item_record, list);
9734                 last = 0;
9735                 buf = read_tree_block(root->fs_info->tree_root,
9736                                       rec->bytenr, rec->level_size, 0);
9737                 if (!extent_buffer_uptodate(buf)) {
9738                         free_extent_buffer(buf);
9739                         ret = -EIO;
9740                         break;
9741                 }
9742                 ret = add_root_to_pending(buf, extent_cache, pending,
9743                                     seen, nodes, rec->objectid);
9744                 if (ret < 0)
9745                         break;
9746                 /*
9747                  * To rebuild extent tree, we need deal with snapshot
9748                  * one by one, otherwise we deal with node firstly which
9749                  * can maximize readahead.
9750                  */
9751                 while (1) {
9752                         ret = run_next_block(root, bits, bits_nr, &last,
9753                                              pending, seen, reada, nodes,
9754                                              extent_cache, chunk_cache,
9755                                              dev_cache, block_group_cache,
9756                                              dev_extent_cache, rec);
9757                         if (ret != 0)
9758                                 break;
9759                 }
9760                 free_extent_buffer(buf);
9761                 list_del(&rec->list);
9762                 free(rec);
9763                 if (ret < 0)
9764                         break;
9765         }
9766         while (ret >= 0) {
9767                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9768                                      reada, nodes, extent_cache, chunk_cache,
9769                                      dev_cache, block_group_cache,
9770                                      dev_extent_cache, NULL);
9771                 if (ret != 0) {
9772                         if (ret > 0)
9773                                 ret = 0;
9774                         break;
9775                 }
9776         }
9777         return ret;
9778 }
9779
9780 static int check_chunks_and_extents(struct btrfs_root *root)
9781 {
9782         struct rb_root dev_cache;
9783         struct cache_tree chunk_cache;
9784         struct block_group_tree block_group_cache;
9785         struct device_extent_tree dev_extent_cache;
9786         struct cache_tree extent_cache;
9787         struct cache_tree seen;
9788         struct cache_tree pending;
9789         struct cache_tree reada;
9790         struct cache_tree nodes;
9791         struct extent_io_tree excluded_extents;
9792         struct cache_tree corrupt_blocks;
9793         struct btrfs_path path;
9794         struct btrfs_key key;
9795         struct btrfs_key found_key;
9796         int ret, err = 0;
9797         struct block_info *bits;
9798         int bits_nr;
9799         struct extent_buffer *leaf;
9800         int slot;
9801         struct btrfs_root_item ri;
9802         struct list_head dropping_trees;
9803         struct list_head normal_trees;
9804         struct btrfs_root *root1;
9805         u64 objectid;
9806         u32 level_size;
9807         u8 level;
9808
9809         dev_cache = RB_ROOT;
9810         cache_tree_init(&chunk_cache);
9811         block_group_tree_init(&block_group_cache);
9812         device_extent_tree_init(&dev_extent_cache);
9813
9814         cache_tree_init(&extent_cache);
9815         cache_tree_init(&seen);
9816         cache_tree_init(&pending);
9817         cache_tree_init(&nodes);
9818         cache_tree_init(&reada);
9819         cache_tree_init(&corrupt_blocks);
9820         extent_io_tree_init(&excluded_extents);
9821         INIT_LIST_HEAD(&dropping_trees);
9822         INIT_LIST_HEAD(&normal_trees);
9823
9824         if (repair) {
9825                 root->fs_info->excluded_extents = &excluded_extents;
9826                 root->fs_info->fsck_extent_cache = &extent_cache;
9827                 root->fs_info->free_extent_hook = free_extent_hook;
9828                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9829         }
9830
9831         bits_nr = 1024;
9832         bits = malloc(bits_nr * sizeof(struct block_info));
9833         if (!bits) {
9834                 perror("malloc");
9835                 exit(1);
9836         }
9837
9838         if (ctx.progress_enabled) {
9839                 ctx.tp = TASK_EXTENTS;
9840                 task_start(ctx.info);
9841         }
9842
9843 again:
9844         root1 = root->fs_info->tree_root;
9845         level = btrfs_header_level(root1->node);
9846         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9847                                     root1->node->start, 0, level, 0,
9848                                     root1->nodesize, NULL);
9849         if (ret < 0)
9850                 goto out;
9851         root1 = root->fs_info->chunk_root;
9852         level = btrfs_header_level(root1->node);
9853         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9854                                     root1->node->start, 0, level, 0,
9855                                     root1->nodesize, NULL);
9856         if (ret < 0)
9857                 goto out;
9858         btrfs_init_path(&path);
9859         key.offset = 0;
9860         key.objectid = 0;
9861         key.type = BTRFS_ROOT_ITEM_KEY;
9862         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9863                                         &key, &path, 0, 0);
9864         if (ret < 0)
9865                 goto out;
9866         while(1) {
9867                 leaf = path.nodes[0];
9868                 slot = path.slots[0];
9869                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9870                         ret = btrfs_next_leaf(root, &path);
9871                         if (ret != 0)
9872                                 break;
9873                         leaf = path.nodes[0];
9874                         slot = path.slots[0];
9875                 }
9876                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9877                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9878                         unsigned long offset;
9879                         u64 last_snapshot;
9880
9881                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9882                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9883                         last_snapshot = btrfs_root_last_snapshot(&ri);
9884                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9885                                 level = btrfs_root_level(&ri);
9886                                 level_size = root->nodesize;
9887                                 ret = add_root_item_to_list(&normal_trees,
9888                                                 found_key.objectid,
9889                                                 btrfs_root_bytenr(&ri),
9890                                                 last_snapshot, level,
9891                                                 0, level_size, NULL);
9892                                 if (ret < 0)
9893                                         goto out;
9894                         } else {
9895                                 level = btrfs_root_level(&ri);
9896                                 level_size = root->nodesize;
9897                                 objectid = found_key.objectid;
9898                                 btrfs_disk_key_to_cpu(&found_key,
9899                                                       &ri.drop_progress);
9900                                 ret = add_root_item_to_list(&dropping_trees,
9901                                                 objectid,
9902                                                 btrfs_root_bytenr(&ri),
9903                                                 last_snapshot, level,
9904                                                 ri.drop_level,
9905                                                 level_size, &found_key);
9906                                 if (ret < 0)
9907                                         goto out;
9908                         }
9909                 }
9910                 path.slots[0]++;
9911         }
9912         btrfs_release_path(&path);
9913
9914         /*
9915          * check_block can return -EAGAIN if it fixes something, please keep
9916          * this in mind when dealing with return values from these functions, if
9917          * we get -EAGAIN we want to fall through and restart the loop.
9918          */
9919         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9920                                   &seen, &reada, &nodes, &extent_cache,
9921                                   &chunk_cache, &dev_cache, &block_group_cache,
9922                                   &dev_extent_cache);
9923         if (ret < 0) {
9924                 if (ret == -EAGAIN)
9925                         goto loop;
9926                 goto out;
9927         }
9928         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9929                                   &pending, &seen, &reada, &nodes,
9930                                   &extent_cache, &chunk_cache, &dev_cache,
9931                                   &block_group_cache, &dev_extent_cache);
9932         if (ret < 0) {
9933                 if (ret == -EAGAIN)
9934                         goto loop;
9935                 goto out;
9936         }
9937
9938         ret = check_chunks(&chunk_cache, &block_group_cache,
9939                            &dev_extent_cache, NULL, NULL, NULL, 0);
9940         if (ret) {
9941                 if (ret == -EAGAIN)
9942                         goto loop;
9943                 err = ret;
9944         }
9945
9946         ret = check_extent_refs(root, &extent_cache);
9947         if (ret < 0) {
9948                 if (ret == -EAGAIN)
9949                         goto loop;
9950                 goto out;
9951         }
9952
9953         ret = check_devices(&dev_cache, &dev_extent_cache);
9954         if (ret && err)
9955                 ret = err;
9956
9957 out:
9958         task_stop(ctx.info);
9959         if (repair) {
9960                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9961                 extent_io_tree_cleanup(&excluded_extents);
9962                 root->fs_info->fsck_extent_cache = NULL;
9963                 root->fs_info->free_extent_hook = NULL;
9964                 root->fs_info->corrupt_blocks = NULL;
9965                 root->fs_info->excluded_extents = NULL;
9966         }
9967         free(bits);
9968         free_chunk_cache_tree(&chunk_cache);
9969         free_device_cache_tree(&dev_cache);
9970         free_block_group_tree(&block_group_cache);
9971         free_device_extent_tree(&dev_extent_cache);
9972         free_extent_cache_tree(&seen);
9973         free_extent_cache_tree(&pending);
9974         free_extent_cache_tree(&reada);
9975         free_extent_cache_tree(&nodes);
9976         free_root_item_list(&normal_trees);
9977         free_root_item_list(&dropping_trees);
9978         return ret;
9979 loop:
9980         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9981         free_extent_cache_tree(&seen);
9982         free_extent_cache_tree(&pending);
9983         free_extent_cache_tree(&reada);
9984         free_extent_cache_tree(&nodes);
9985         free_chunk_cache_tree(&chunk_cache);
9986         free_block_group_tree(&block_group_cache);
9987         free_device_cache_tree(&dev_cache);
9988         free_device_extent_tree(&dev_extent_cache);
9989         free_extent_record_cache(&extent_cache);
9990         free_root_item_list(&normal_trees);
9991         free_root_item_list(&dropping_trees);
9992         extent_io_tree_cleanup(&excluded_extents);
9993         goto again;
9994 }
9995
9996 /*
9997  * Check backrefs of a tree block given by @bytenr or @eb.
9998  *
9999  * @root:       the root containing the @bytenr or @eb
10000  * @eb:         tree block extent buffer, can be NULL
10001  * @bytenr:     bytenr of the tree block to search
10002  * @level:      tree level of the tree block
10003  * @owner:      owner of the tree block
10004  *
10005  * Return >0 for any error found and output error message
10006  * Return 0 for no error found
10007  */
10008 static int check_tree_block_ref(struct btrfs_root *root,
10009                                 struct extent_buffer *eb, u64 bytenr,
10010                                 int level, u64 owner)
10011 {
10012         struct btrfs_key key;
10013         struct btrfs_root *extent_root = root->fs_info->extent_root;
10014         struct btrfs_path path;
10015         struct btrfs_extent_item *ei;
10016         struct btrfs_extent_inline_ref *iref;
10017         struct extent_buffer *leaf;
10018         unsigned long end;
10019         unsigned long ptr;
10020         int slot;
10021         int skinny_level;
10022         int type;
10023         u32 nodesize = root->nodesize;
10024         u32 item_size;
10025         u64 offset;
10026         int tree_reloc_root = 0;
10027         int found_ref = 0;
10028         int err = 0;
10029         int ret;
10030
10031         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10032             btrfs_header_bytenr(root->node) == bytenr)
10033                 tree_reloc_root = 1;
10034
10035         btrfs_init_path(&path);
10036         key.objectid = bytenr;
10037         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10038                 key.type = BTRFS_METADATA_ITEM_KEY;
10039         else
10040                 key.type = BTRFS_EXTENT_ITEM_KEY;
10041         key.offset = (u64)-1;
10042
10043         /* Search for the backref in extent tree */
10044         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10045         if (ret < 0) {
10046                 err |= BACKREF_MISSING;
10047                 goto out;
10048         }
10049         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10050         if (ret) {
10051                 err |= BACKREF_MISSING;
10052                 goto out;
10053         }
10054
10055         leaf = path.nodes[0];
10056         slot = path.slots[0];
10057         btrfs_item_key_to_cpu(leaf, &key, slot);
10058
10059         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10060
10061         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10062                 skinny_level = (int)key.offset;
10063                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10064         } else {
10065                 struct btrfs_tree_block_info *info;
10066
10067                 info = (struct btrfs_tree_block_info *)(ei + 1);
10068                 skinny_level = btrfs_tree_block_level(leaf, info);
10069                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10070         }
10071
10072         if (eb) {
10073                 u64 header_gen;
10074                 u64 extent_gen;
10075
10076                 if (!(btrfs_extent_flags(leaf, ei) &
10077                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10078                         error(
10079                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10080                                 key.objectid, nodesize,
10081                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10082                         err = BACKREF_MISMATCH;
10083                 }
10084                 header_gen = btrfs_header_generation(eb);
10085                 extent_gen = btrfs_extent_generation(leaf, ei);
10086                 if (header_gen != extent_gen) {
10087                         error(
10088         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10089                                 key.objectid, nodesize, header_gen,
10090                                 extent_gen);
10091                         err = BACKREF_MISMATCH;
10092                 }
10093                 if (level != skinny_level) {
10094                         error(
10095                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10096                                 key.objectid, nodesize, level, skinny_level);
10097                         err = BACKREF_MISMATCH;
10098                 }
10099                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10100                         error(
10101                         "extent[%llu %u] is referred by other roots than %llu",
10102                                 key.objectid, nodesize, root->objectid);
10103                         err = BACKREF_MISMATCH;
10104                 }
10105         }
10106
10107         /*
10108          * Iterate the extent/metadata item to find the exact backref
10109          */
10110         item_size = btrfs_item_size_nr(leaf, slot);
10111         ptr = (unsigned long)iref;
10112         end = (unsigned long)ei + item_size;
10113         while (ptr < end) {
10114                 iref = (struct btrfs_extent_inline_ref *)ptr;
10115                 type = btrfs_extent_inline_ref_type(leaf, iref);
10116                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10117
10118                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10119                         (offset == root->objectid || offset == owner)) {
10120                         found_ref = 1;
10121                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10122                         /*
10123                          * Backref of tree reloc root points to itself, no need
10124                          * to check backref any more.
10125                          */
10126                         if (tree_reloc_root)
10127                                 found_ref = 1;
10128                         else
10129                         /* Check if the backref points to valid referencer */
10130                                 found_ref = !check_tree_block_ref(root, NULL,
10131                                                 offset, level + 1, owner);
10132                 }
10133
10134                 if (found_ref)
10135                         break;
10136                 ptr += btrfs_extent_inline_ref_size(type);
10137         }
10138
10139         /*
10140          * Inlined extent item doesn't have what we need, check
10141          * TREE_BLOCK_REF_KEY
10142          */
10143         if (!found_ref) {
10144                 btrfs_release_path(&path);
10145                 key.objectid = bytenr;
10146                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10147                 key.offset = root->objectid;
10148
10149                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10150                 if (!ret)
10151                         found_ref = 1;
10152         }
10153         if (!found_ref)
10154                 err |= BACKREF_MISSING;
10155 out:
10156         btrfs_release_path(&path);
10157         if (eb && (err & BACKREF_MISSING))
10158                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10159                         bytenr, nodesize, owner, level);
10160         return err;
10161 }
10162
10163 /*
10164  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10165  *
10166  * Return >0 any error found and output error message
10167  * Return 0 for no error found
10168  */
10169 static int check_extent_data_item(struct btrfs_root *root,
10170                                   struct extent_buffer *eb, int slot)
10171 {
10172         struct btrfs_file_extent_item *fi;
10173         struct btrfs_path path;
10174         struct btrfs_root *extent_root = root->fs_info->extent_root;
10175         struct btrfs_key fi_key;
10176         struct btrfs_key dbref_key;
10177         struct extent_buffer *leaf;
10178         struct btrfs_extent_item *ei;
10179         struct btrfs_extent_inline_ref *iref;
10180         struct btrfs_extent_data_ref *dref;
10181         u64 owner;
10182         u64 disk_bytenr;
10183         u64 disk_num_bytes;
10184         u64 extent_num_bytes;
10185         u64 extent_flags;
10186         u32 item_size;
10187         unsigned long end;
10188         unsigned long ptr;
10189         int type;
10190         u64 ref_root;
10191         int found_dbackref = 0;
10192         int err = 0;
10193         int ret;
10194
10195         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10196         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10197
10198         /* Nothing to check for hole and inline data extents */
10199         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10200             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10201                 return 0;
10202
10203         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10204         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10205         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10206
10207         /* Check unaligned disk_num_bytes and num_bytes */
10208         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10209                 error(
10210 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10211                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10212                         root->sectorsize);
10213                 err |= BYTES_UNALIGNED;
10214         } else {
10215                 data_bytes_allocated += disk_num_bytes;
10216         }
10217         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10218                 error(
10219 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10220                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10221                         root->sectorsize);
10222                 err |= BYTES_UNALIGNED;
10223         } else {
10224                 data_bytes_referenced += extent_num_bytes;
10225         }
10226         owner = btrfs_header_owner(eb);
10227
10228         /* Check the extent item of the file extent in extent tree */
10229         btrfs_init_path(&path);
10230         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10231         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10232         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10233
10234         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10235         if (ret)
10236                 goto out;
10237
10238         leaf = path.nodes[0];
10239         slot = path.slots[0];
10240         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10241
10242         extent_flags = btrfs_extent_flags(leaf, ei);
10243
10244         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10245                 error(
10246                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10247                     disk_bytenr, disk_num_bytes,
10248                     BTRFS_EXTENT_FLAG_DATA);
10249                 err |= BACKREF_MISMATCH;
10250         }
10251
10252         /* Check data backref inside that extent item */
10253         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10254         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10255         ptr = (unsigned long)iref;
10256         end = (unsigned long)ei + item_size;
10257         while (ptr < end) {
10258                 iref = (struct btrfs_extent_inline_ref *)ptr;
10259                 type = btrfs_extent_inline_ref_type(leaf, iref);
10260                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10261
10262                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10263                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10264                         if (ref_root == owner || ref_root == root->objectid)
10265                                 found_dbackref = 1;
10266                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10267                         found_dbackref = !check_tree_block_ref(root, NULL,
10268                                 btrfs_extent_inline_ref_offset(leaf, iref),
10269                                 0, owner);
10270                 }
10271
10272                 if (found_dbackref)
10273                         break;
10274                 ptr += btrfs_extent_inline_ref_size(type);
10275         }
10276
10277         if (!found_dbackref) {
10278                 btrfs_release_path(&path);
10279
10280                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10281                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10282                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10283                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10284                                 fi_key.objectid, fi_key.offset);
10285
10286                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10287                                         &dbref_key, &path, 0, 0);
10288                 if (!ret) {
10289                         found_dbackref = 1;
10290                         goto out;
10291                 }
10292
10293                 btrfs_release_path(&path);
10294
10295                 /*
10296                  * Neither inlined nor EXTENT_DATA_REF found, try
10297                  * SHARED_DATA_REF as last chance.
10298                  */
10299                 dbref_key.objectid = disk_bytenr;
10300                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10301                 dbref_key.offset = eb->start;
10302
10303                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10304                                         &dbref_key, &path, 0, 0);
10305                 if (!ret) {
10306                         found_dbackref = 1;
10307                         goto out;
10308                 }
10309         }
10310
10311 out:
10312         if (!found_dbackref)
10313                 err |= BACKREF_MISSING;
10314         btrfs_release_path(&path);
10315         if (err & BACKREF_MISSING) {
10316                 error("data extent[%llu %llu] backref lost",
10317                       disk_bytenr, disk_num_bytes);
10318         }
10319         return err;
10320 }
10321
10322 /*
10323  * Get real tree block level for the case like shared block
10324  * Return >= 0 as tree level
10325  * Return <0 for error
10326  */
10327 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10328 {
10329         struct extent_buffer *eb;
10330         struct btrfs_path path;
10331         struct btrfs_key key;
10332         struct btrfs_extent_item *ei;
10333         u64 flags;
10334         u64 transid;
10335         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10336         u8 backref_level;
10337         u8 header_level;
10338         int ret;
10339
10340         /* Search extent tree for extent generation and level */
10341         key.objectid = bytenr;
10342         key.type = BTRFS_METADATA_ITEM_KEY;
10343         key.offset = (u64)-1;
10344
10345         btrfs_init_path(&path);
10346         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10347         if (ret < 0)
10348                 goto release_out;
10349         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10350         if (ret < 0)
10351                 goto release_out;
10352         if (ret > 0) {
10353                 ret = -ENOENT;
10354                 goto release_out;
10355         }
10356
10357         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10358         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10359                             struct btrfs_extent_item);
10360         flags = btrfs_extent_flags(path.nodes[0], ei);
10361         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10362                 ret = -ENOENT;
10363                 goto release_out;
10364         }
10365
10366         /* Get transid for later read_tree_block() check */
10367         transid = btrfs_extent_generation(path.nodes[0], ei);
10368
10369         /* Get backref level as one source */
10370         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10371                 backref_level = key.offset;
10372         } else {
10373                 struct btrfs_tree_block_info *info;
10374
10375                 info = (struct btrfs_tree_block_info *)(ei + 1);
10376                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10377         }
10378         btrfs_release_path(&path);
10379
10380         /* Get level from tree block as an alternative source */
10381         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10382         if (!extent_buffer_uptodate(eb)) {
10383                 free_extent_buffer(eb);
10384                 return -EIO;
10385         }
10386         header_level = btrfs_header_level(eb);
10387         free_extent_buffer(eb);
10388
10389         if (header_level != backref_level)
10390                 return -EIO;
10391         return header_level;
10392
10393 release_out:
10394         btrfs_release_path(&path);
10395         return ret;
10396 }
10397
10398 /*
10399  * Check if a tree block backref is valid (points to a valid tree block)
10400  * if level == -1, level will be resolved
10401  * Return >0 for any error found and print error message
10402  */
10403 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10404                                     u64 bytenr, int level)
10405 {
10406         struct btrfs_root *root;
10407         struct btrfs_key key;
10408         struct btrfs_path path;
10409         struct extent_buffer *eb;
10410         struct extent_buffer *node;
10411         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10412         int err = 0;
10413         int ret;
10414
10415         /* Query level for level == -1 special case */
10416         if (level == -1)
10417                 level = query_tree_block_level(fs_info, bytenr);
10418         if (level < 0) {
10419                 err |= REFERENCER_MISSING;
10420                 goto out;
10421         }
10422
10423         key.objectid = root_id;
10424         key.type = BTRFS_ROOT_ITEM_KEY;
10425         key.offset = (u64)-1;
10426
10427         root = btrfs_read_fs_root(fs_info, &key);
10428         if (IS_ERR(root)) {
10429                 err |= REFERENCER_MISSING;
10430                 goto out;
10431         }
10432
10433         /* Read out the tree block to get item/node key */
10434         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10435         if (!extent_buffer_uptodate(eb)) {
10436                 err |= REFERENCER_MISSING;
10437                 free_extent_buffer(eb);
10438                 goto out;
10439         }
10440
10441         /* Empty tree, no need to check key */
10442         if (!btrfs_header_nritems(eb) && !level) {
10443                 free_extent_buffer(eb);
10444                 goto out;
10445         }
10446
10447         if (level)
10448                 btrfs_node_key_to_cpu(eb, &key, 0);
10449         else
10450                 btrfs_item_key_to_cpu(eb, &key, 0);
10451
10452         free_extent_buffer(eb);
10453
10454         btrfs_init_path(&path);
10455         path.lowest_level = level;
10456         /* Search with the first key, to ensure we can reach it */
10457         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10458         if (ret < 0) {
10459                 err |= REFERENCER_MISSING;
10460                 goto release_out;
10461         }
10462
10463         node = path.nodes[level];
10464         if (btrfs_header_bytenr(node) != bytenr) {
10465                 error(
10466         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10467                         bytenr, nodesize, bytenr,
10468                         btrfs_header_bytenr(node));
10469                 err |= REFERENCER_MISMATCH;
10470         }
10471         if (btrfs_header_level(node) != level) {
10472                 error(
10473         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10474                         bytenr, nodesize, level,
10475                         btrfs_header_level(node));
10476                 err |= REFERENCER_MISMATCH;
10477         }
10478
10479 release_out:
10480         btrfs_release_path(&path);
10481 out:
10482         if (err & REFERENCER_MISSING) {
10483                 if (level < 0)
10484                         error("extent [%llu %d] lost referencer (owner: %llu)",
10485                                 bytenr, nodesize, root_id);
10486                 else
10487                         error(
10488                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10489                                 bytenr, nodesize, root_id, level);
10490         }
10491
10492         return err;
10493 }
10494
10495 /*
10496  * Check if tree block @eb is tree reloc root.
10497  * Return 0 if it's not or any problem happens
10498  * Return 1 if it's a tree reloc root
10499  */
10500 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10501                                  struct extent_buffer *eb)
10502 {
10503         struct btrfs_root *tree_reloc_root;
10504         struct btrfs_key key;
10505         u64 bytenr = btrfs_header_bytenr(eb);
10506         u64 owner = btrfs_header_owner(eb);
10507         int ret = 0;
10508
10509         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10510         key.offset = owner;
10511         key.type = BTRFS_ROOT_ITEM_KEY;
10512
10513         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10514         if (IS_ERR(tree_reloc_root))
10515                 return 0;
10516
10517         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10518                 ret = 1;
10519         btrfs_free_fs_root(tree_reloc_root);
10520         return ret;
10521 }
10522
10523 /*
10524  * Check referencer for shared block backref
10525  * If level == -1, this function will resolve the level.
10526  */
10527 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10528                                      u64 parent, u64 bytenr, int level)
10529 {
10530         struct extent_buffer *eb;
10531         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10532         u32 nr;
10533         int found_parent = 0;
10534         int i;
10535
10536         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10537         if (!extent_buffer_uptodate(eb))
10538                 goto out;
10539
10540         if (level == -1)
10541                 level = query_tree_block_level(fs_info, bytenr);
10542         if (level < 0)
10543                 goto out;
10544
10545         /* It's possible it's a tree reloc root */
10546         if (parent == bytenr) {
10547                 if (is_tree_reloc_root(fs_info, eb))
10548                         found_parent = 1;
10549                 goto out;
10550         }
10551
10552         if (level + 1 != btrfs_header_level(eb))
10553                 goto out;
10554
10555         nr = btrfs_header_nritems(eb);
10556         for (i = 0; i < nr; i++) {
10557                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10558                         found_parent = 1;
10559                         break;
10560                 }
10561         }
10562 out:
10563         free_extent_buffer(eb);
10564         if (!found_parent) {
10565                 error(
10566         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10567                         bytenr, nodesize, parent, level);
10568                 return REFERENCER_MISSING;
10569         }
10570         return 0;
10571 }
10572
10573 /*
10574  * Check referencer for normal (inlined) data ref
10575  * If len == 0, it will be resolved by searching in extent tree
10576  */
10577 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10578                                      u64 root_id, u64 objectid, u64 offset,
10579                                      u64 bytenr, u64 len, u32 count)
10580 {
10581         struct btrfs_root *root;
10582         struct btrfs_root *extent_root = fs_info->extent_root;
10583         struct btrfs_key key;
10584         struct btrfs_path path;
10585         struct extent_buffer *leaf;
10586         struct btrfs_file_extent_item *fi;
10587         u32 found_count = 0;
10588         int slot;
10589         int ret = 0;
10590
10591         if (!len) {
10592                 key.objectid = bytenr;
10593                 key.type = BTRFS_EXTENT_ITEM_KEY;
10594                 key.offset = (u64)-1;
10595
10596                 btrfs_init_path(&path);
10597                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10598                 if (ret < 0)
10599                         goto out;
10600                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10601                 if (ret)
10602                         goto out;
10603                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10604                 if (key.objectid != bytenr ||
10605                     key.type != BTRFS_EXTENT_ITEM_KEY)
10606                         goto out;
10607                 len = key.offset;
10608                 btrfs_release_path(&path);
10609         }
10610         key.objectid = root_id;
10611         key.type = BTRFS_ROOT_ITEM_KEY;
10612         key.offset = (u64)-1;
10613         btrfs_init_path(&path);
10614
10615         root = btrfs_read_fs_root(fs_info, &key);
10616         if (IS_ERR(root))
10617                 goto out;
10618
10619         key.objectid = objectid;
10620         key.type = BTRFS_EXTENT_DATA_KEY;
10621         /*
10622          * It can be nasty as data backref offset is
10623          * file offset - file extent offset, which is smaller or
10624          * equal to original backref offset.  The only special case is
10625          * overflow.  So we need to special check and do further search.
10626          */
10627         key.offset = offset & (1ULL << 63) ? 0 : offset;
10628
10629         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10630         if (ret < 0)
10631                 goto out;
10632
10633         /*
10634          * Search afterwards to get correct one
10635          * NOTE: As we must do a comprehensive check on the data backref to
10636          * make sure the dref count also matches, we must iterate all file
10637          * extents for that inode.
10638          */
10639         while (1) {
10640                 leaf = path.nodes[0];
10641                 slot = path.slots[0];
10642
10643                 if (slot >= btrfs_header_nritems(leaf))
10644                         goto next;
10645                 btrfs_item_key_to_cpu(leaf, &key, slot);
10646                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10647                         break;
10648                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10649                 /*
10650                  * Except normal disk bytenr and disk num bytes, we still
10651                  * need to do extra check on dbackref offset as
10652                  * dbackref offset = file_offset - file_extent_offset
10653                  */
10654                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10655                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10656                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10657                     offset)
10658                         found_count++;
10659
10660 next:
10661                 ret = btrfs_next_item(root, &path);
10662                 if (ret)
10663                         break;
10664         }
10665 out:
10666         btrfs_release_path(&path);
10667         if (found_count != count) {
10668                 error(
10669 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10670                         bytenr, len, root_id, objectid, offset, count, found_count);
10671                 return REFERENCER_MISSING;
10672         }
10673         return 0;
10674 }
10675
10676 /*
10677  * Check if the referencer of a shared data backref exists
10678  */
10679 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10680                                      u64 parent, u64 bytenr)
10681 {
10682         struct extent_buffer *eb;
10683         struct btrfs_key key;
10684         struct btrfs_file_extent_item *fi;
10685         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10686         u32 nr;
10687         int found_parent = 0;
10688         int i;
10689
10690         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10691         if (!extent_buffer_uptodate(eb))
10692                 goto out;
10693
10694         nr = btrfs_header_nritems(eb);
10695         for (i = 0; i < nr; i++) {
10696                 btrfs_item_key_to_cpu(eb, &key, i);
10697                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10698                         continue;
10699
10700                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10701                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10702                         continue;
10703
10704                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10705                         found_parent = 1;
10706                         break;
10707                 }
10708         }
10709
10710 out:
10711         free_extent_buffer(eb);
10712         if (!found_parent) {
10713                 error("shared extent %llu referencer lost (parent: %llu)",
10714                         bytenr, parent);
10715                 return REFERENCER_MISSING;
10716         }
10717         return 0;
10718 }
10719
10720 /*
10721  * This function will check a given extent item, including its backref and
10722  * itself (like crossing stripe boundary and type)
10723  *
10724  * Since we don't use extent_record anymore, introduce new error bit
10725  */
10726 static int check_extent_item(struct btrfs_fs_info *fs_info,
10727                              struct extent_buffer *eb, int slot)
10728 {
10729         struct btrfs_extent_item *ei;
10730         struct btrfs_extent_inline_ref *iref;
10731         struct btrfs_extent_data_ref *dref;
10732         unsigned long end;
10733         unsigned long ptr;
10734         int type;
10735         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10736         u32 item_size = btrfs_item_size_nr(eb, slot);
10737         u64 flags;
10738         u64 offset;
10739         int metadata = 0;
10740         int level;
10741         struct btrfs_key key;
10742         int ret;
10743         int err = 0;
10744
10745         btrfs_item_key_to_cpu(eb, &key, slot);
10746         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10747                 bytes_used += key.offset;
10748         else
10749                 bytes_used += nodesize;
10750
10751         if (item_size < sizeof(*ei)) {
10752                 /*
10753                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10754                  * old thing when on disk format is still un-determined.
10755                  * No need to care about it anymore
10756                  */
10757                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10758                 return -ENOTTY;
10759         }
10760
10761         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10762         flags = btrfs_extent_flags(eb, ei);
10763
10764         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10765                 metadata = 1;
10766         if (metadata && check_crossing_stripes(global_info, key.objectid,
10767                                                eb->len)) {
10768                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10769                       key.objectid, key.objectid + nodesize);
10770                 err |= CROSSING_STRIPE_BOUNDARY;
10771         }
10772
10773         ptr = (unsigned long)(ei + 1);
10774
10775         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10776                 /* Old EXTENT_ITEM metadata */
10777                 struct btrfs_tree_block_info *info;
10778
10779                 info = (struct btrfs_tree_block_info *)ptr;
10780                 level = btrfs_tree_block_level(eb, info);
10781                 ptr += sizeof(struct btrfs_tree_block_info);
10782         } else {
10783                 /* New METADATA_ITEM */
10784                 level = key.offset;
10785         }
10786         end = (unsigned long)ei + item_size;
10787
10788 next:
10789         /* Reached extent item end normally */
10790         if (ptr == end)
10791                 goto out;
10792
10793         /* Beyond extent item end, wrong item size */
10794         if (ptr > end) {
10795                 err |= ITEM_SIZE_MISMATCH;
10796                 error("extent item at bytenr %llu slot %d has wrong size",
10797                         eb->start, slot);
10798                 goto out;
10799         }
10800
10801         /* Now check every backref in this extent item */
10802         iref = (struct btrfs_extent_inline_ref *)ptr;
10803         type = btrfs_extent_inline_ref_type(eb, iref);
10804         offset = btrfs_extent_inline_ref_offset(eb, iref);
10805         switch (type) {
10806         case BTRFS_TREE_BLOCK_REF_KEY:
10807                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10808                                                level);
10809                 err |= ret;
10810                 break;
10811         case BTRFS_SHARED_BLOCK_REF_KEY:
10812                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10813                                                  level);
10814                 err |= ret;
10815                 break;
10816         case BTRFS_EXTENT_DATA_REF_KEY:
10817                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10818                 ret = check_extent_data_backref(fs_info,
10819                                 btrfs_extent_data_ref_root(eb, dref),
10820                                 btrfs_extent_data_ref_objectid(eb, dref),
10821                                 btrfs_extent_data_ref_offset(eb, dref),
10822                                 key.objectid, key.offset,
10823                                 btrfs_extent_data_ref_count(eb, dref));
10824                 err |= ret;
10825                 break;
10826         case BTRFS_SHARED_DATA_REF_KEY:
10827                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10828                 err |= ret;
10829                 break;
10830         default:
10831                 error("extent[%llu %d %llu] has unknown ref type: %d",
10832                         key.objectid, key.type, key.offset, type);
10833                 err |= UNKNOWN_TYPE;
10834                 goto out;
10835         }
10836
10837         ptr += btrfs_extent_inline_ref_size(type);
10838         goto next;
10839
10840 out:
10841         return err;
10842 }
10843
10844 /*
10845  * Check if a dev extent item is referred correctly by its chunk
10846  */
10847 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10848                                  struct extent_buffer *eb, int slot)
10849 {
10850         struct btrfs_root *chunk_root = fs_info->chunk_root;
10851         struct btrfs_dev_extent *ptr;
10852         struct btrfs_path path;
10853         struct btrfs_key chunk_key;
10854         struct btrfs_key devext_key;
10855         struct btrfs_chunk *chunk;
10856         struct extent_buffer *l;
10857         int num_stripes;
10858         u64 length;
10859         int i;
10860         int found_chunk = 0;
10861         int ret;
10862
10863         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10864         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10865         length = btrfs_dev_extent_length(eb, ptr);
10866
10867         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10868         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10869         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10870
10871         btrfs_init_path(&path);
10872         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10873         if (ret)
10874                 goto out;
10875
10876         l = path.nodes[0];
10877         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10878         if (btrfs_chunk_length(l, chunk) != length)
10879                 goto out;
10880
10881         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10882         for (i = 0; i < num_stripes; i++) {
10883                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10884                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10885
10886                 if (devid == devext_key.objectid &&
10887                     offset == devext_key.offset) {
10888                         found_chunk = 1;
10889                         break;
10890                 }
10891         }
10892 out:
10893         btrfs_release_path(&path);
10894         if (!found_chunk) {
10895                 error(
10896                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10897                         devext_key.objectid, devext_key.offset, length);
10898                 return REFERENCER_MISSING;
10899         }
10900         return 0;
10901 }
10902
10903 /*
10904  * Check if the used space is correct with the dev item
10905  */
10906 static int check_dev_item(struct btrfs_fs_info *fs_info,
10907                           struct extent_buffer *eb, int slot)
10908 {
10909         struct btrfs_root *dev_root = fs_info->dev_root;
10910         struct btrfs_dev_item *dev_item;
10911         struct btrfs_path path;
10912         struct btrfs_key key;
10913         struct btrfs_dev_extent *ptr;
10914         u64 dev_id;
10915         u64 used;
10916         u64 total = 0;
10917         int ret;
10918
10919         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10920         dev_id = btrfs_device_id(eb, dev_item);
10921         used = btrfs_device_bytes_used(eb, dev_item);
10922
10923         key.objectid = dev_id;
10924         key.type = BTRFS_DEV_EXTENT_KEY;
10925         key.offset = 0;
10926
10927         btrfs_init_path(&path);
10928         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10929         if (ret < 0) {
10930                 btrfs_item_key_to_cpu(eb, &key, slot);
10931                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10932                         key.objectid, key.type, key.offset);
10933                 btrfs_release_path(&path);
10934                 return REFERENCER_MISSING;
10935         }
10936
10937         /* Iterate dev_extents to calculate the used space of a device */
10938         while (1) {
10939                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10940                         goto next;
10941
10942                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10943                 if (key.objectid > dev_id)
10944                         break;
10945                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10946                         goto next;
10947
10948                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10949                                      struct btrfs_dev_extent);
10950                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10951 next:
10952                 ret = btrfs_next_item(dev_root, &path);
10953                 if (ret)
10954                         break;
10955         }
10956         btrfs_release_path(&path);
10957
10958         if (used != total) {
10959                 btrfs_item_key_to_cpu(eb, &key, slot);
10960                 error(
10961 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10962                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10963                         BTRFS_DEV_EXTENT_KEY, dev_id);
10964                 return ACCOUNTING_MISMATCH;
10965         }
10966         return 0;
10967 }
10968
10969 /*
10970  * Check a block group item with its referener (chunk) and its used space
10971  * with extent/metadata item
10972  */
10973 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10974                                   struct extent_buffer *eb, int slot)
10975 {
10976         struct btrfs_root *extent_root = fs_info->extent_root;
10977         struct btrfs_root *chunk_root = fs_info->chunk_root;
10978         struct btrfs_block_group_item *bi;
10979         struct btrfs_block_group_item bg_item;
10980         struct btrfs_path path;
10981         struct btrfs_key bg_key;
10982         struct btrfs_key chunk_key;
10983         struct btrfs_key extent_key;
10984         struct btrfs_chunk *chunk;
10985         struct extent_buffer *leaf;
10986         struct btrfs_extent_item *ei;
10987         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10988         u64 flags;
10989         u64 bg_flags;
10990         u64 used;
10991         u64 total = 0;
10992         int ret;
10993         int err = 0;
10994
10995         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10996         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10997         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10998         used = btrfs_block_group_used(&bg_item);
10999         bg_flags = btrfs_block_group_flags(&bg_item);
11000
11001         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11002         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11003         chunk_key.offset = bg_key.objectid;
11004
11005         btrfs_init_path(&path);
11006         /* Search for the referencer chunk */
11007         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11008         if (ret) {
11009                 error(
11010                 "block group[%llu %llu] did not find the related chunk item",
11011                         bg_key.objectid, bg_key.offset);
11012                 err |= REFERENCER_MISSING;
11013         } else {
11014                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11015                                         struct btrfs_chunk);
11016                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11017                                                 bg_key.offset) {
11018                         error(
11019         "block group[%llu %llu] related chunk item length does not match",
11020                                 bg_key.objectid, bg_key.offset);
11021                         err |= REFERENCER_MISMATCH;
11022                 }
11023         }
11024         btrfs_release_path(&path);
11025
11026         /* Search from the block group bytenr */
11027         extent_key.objectid = bg_key.objectid;
11028         extent_key.type = 0;
11029         extent_key.offset = 0;
11030
11031         btrfs_init_path(&path);
11032         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11033         if (ret < 0)
11034                 goto out;
11035
11036         /* Iterate extent tree to account used space */
11037         while (1) {
11038                 leaf = path.nodes[0];
11039
11040                 /* Search slot can point to the last item beyond leaf nritems */
11041                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11042                         goto next;
11043
11044                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11045                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11046                         break;
11047
11048                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11049                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11050                         goto next;
11051                 if (extent_key.objectid < bg_key.objectid)
11052                         goto next;
11053
11054                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11055                         total += nodesize;
11056                 else
11057                         total += extent_key.offset;
11058
11059                 ei = btrfs_item_ptr(leaf, path.slots[0],
11060                                     struct btrfs_extent_item);
11061                 flags = btrfs_extent_flags(leaf, ei);
11062                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11063                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11064                                 error(
11065                         "bad extent[%llu, %llu) type mismatch with chunk",
11066                                         extent_key.objectid,
11067                                         extent_key.objectid + extent_key.offset);
11068                                 err |= CHUNK_TYPE_MISMATCH;
11069                         }
11070                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11071                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11072                                     BTRFS_BLOCK_GROUP_METADATA))) {
11073                                 error(
11074                         "bad extent[%llu, %llu) type mismatch with chunk",
11075                                         extent_key.objectid,
11076                                         extent_key.objectid + nodesize);
11077                                 err |= CHUNK_TYPE_MISMATCH;
11078                         }
11079                 }
11080 next:
11081                 ret = btrfs_next_item(extent_root, &path);
11082                 if (ret)
11083                         break;
11084         }
11085
11086 out:
11087         btrfs_release_path(&path);
11088
11089         if (total != used) {
11090                 error(
11091                 "block group[%llu %llu] used %llu but extent items used %llu",
11092                         bg_key.objectid, bg_key.offset, used, total);
11093                 err |= ACCOUNTING_MISMATCH;
11094         }
11095         return err;
11096 }
11097
11098 /*
11099  * Check a chunk item.
11100  * Including checking all referred dev_extents and block group
11101  */
11102 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11103                             struct extent_buffer *eb, int slot)
11104 {
11105         struct btrfs_root *extent_root = fs_info->extent_root;
11106         struct btrfs_root *dev_root = fs_info->dev_root;
11107         struct btrfs_path path;
11108         struct btrfs_key chunk_key;
11109         struct btrfs_key bg_key;
11110         struct btrfs_key devext_key;
11111         struct btrfs_chunk *chunk;
11112         struct extent_buffer *leaf;
11113         struct btrfs_block_group_item *bi;
11114         struct btrfs_block_group_item bg_item;
11115         struct btrfs_dev_extent *ptr;
11116         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11117         u64 length;
11118         u64 chunk_end;
11119         u64 type;
11120         u64 profile;
11121         int num_stripes;
11122         u64 offset;
11123         u64 objectid;
11124         int i;
11125         int ret;
11126         int err = 0;
11127
11128         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11129         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11130         length = btrfs_chunk_length(eb, chunk);
11131         chunk_end = chunk_key.offset + length;
11132         if (!IS_ALIGNED(length, sectorsize)) {
11133                 error("chunk[%llu %llu) not aligned to %u",
11134                         chunk_key.offset, chunk_end, sectorsize);
11135                 err |= BYTES_UNALIGNED;
11136                 goto out;
11137         }
11138
11139         type = btrfs_chunk_type(eb, chunk);
11140         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11141         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11142                 error("chunk[%llu %llu) has no chunk type",
11143                         chunk_key.offset, chunk_end);
11144                 err |= UNKNOWN_TYPE;
11145         }
11146         if (profile && (profile & (profile - 1))) {
11147                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11148                         chunk_key.offset, chunk_end, profile);
11149                 err |= UNKNOWN_TYPE;
11150         }
11151
11152         bg_key.objectid = chunk_key.offset;
11153         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11154         bg_key.offset = length;
11155
11156         btrfs_init_path(&path);
11157         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11158         if (ret) {
11159                 error(
11160                 "chunk[%llu %llu) did not find the related block group item",
11161                         chunk_key.offset, chunk_end);
11162                 err |= REFERENCER_MISSING;
11163         } else{
11164                 leaf = path.nodes[0];
11165                 bi = btrfs_item_ptr(leaf, path.slots[0],
11166                                     struct btrfs_block_group_item);
11167                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11168                                    sizeof(bg_item));
11169                 if (btrfs_block_group_flags(&bg_item) != type) {
11170                         error(
11171 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11172                                 chunk_key.offset, chunk_end, type,
11173                                 btrfs_block_group_flags(&bg_item));
11174                         err |= REFERENCER_MISSING;
11175                 }
11176         }
11177
11178         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11179         for (i = 0; i < num_stripes; i++) {
11180                 btrfs_release_path(&path);
11181                 btrfs_init_path(&path);
11182                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11183                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11184                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11185
11186                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11187                                         0, 0);
11188                 if (ret)
11189                         goto not_match_dev;
11190
11191                 leaf = path.nodes[0];
11192                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11193                                      struct btrfs_dev_extent);
11194                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11195                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11196                 if (objectid != chunk_key.objectid ||
11197                     offset != chunk_key.offset ||
11198                     btrfs_dev_extent_length(leaf, ptr) != length)
11199                         goto not_match_dev;
11200                 continue;
11201 not_match_dev:
11202                 err |= BACKREF_MISSING;
11203                 error(
11204                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11205                         chunk_key.objectid, chunk_end, i);
11206                 continue;
11207         }
11208         btrfs_release_path(&path);
11209 out:
11210         return err;
11211 }
11212
11213 /*
11214  * Main entry function to check known items and update related accounting info
11215  */
11216 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11217 {
11218         struct btrfs_fs_info *fs_info = root->fs_info;
11219         struct btrfs_key key;
11220         int slot = 0;
11221         int type;
11222         struct btrfs_extent_data_ref *dref;
11223         int ret;
11224         int err = 0;
11225
11226 next:
11227         btrfs_item_key_to_cpu(eb, &key, slot);
11228         type = key.type;
11229
11230         switch (type) {
11231         case BTRFS_EXTENT_DATA_KEY:
11232                 ret = check_extent_data_item(root, eb, slot);
11233                 err |= ret;
11234                 break;
11235         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11236                 ret = check_block_group_item(fs_info, eb, slot);
11237                 err |= ret;
11238                 break;
11239         case BTRFS_DEV_ITEM_KEY:
11240                 ret = check_dev_item(fs_info, eb, slot);
11241                 err |= ret;
11242                 break;
11243         case BTRFS_CHUNK_ITEM_KEY:
11244                 ret = check_chunk_item(fs_info, eb, slot);
11245                 err |= ret;
11246                 break;
11247         case BTRFS_DEV_EXTENT_KEY:
11248                 ret = check_dev_extent_item(fs_info, eb, slot);
11249                 err |= ret;
11250                 break;
11251         case BTRFS_EXTENT_ITEM_KEY:
11252         case BTRFS_METADATA_ITEM_KEY:
11253                 ret = check_extent_item(fs_info, eb, slot);
11254                 err |= ret;
11255                 break;
11256         case BTRFS_EXTENT_CSUM_KEY:
11257                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11258                 break;
11259         case BTRFS_TREE_BLOCK_REF_KEY:
11260                 ret = check_tree_block_backref(fs_info, key.offset,
11261                                                key.objectid, -1);
11262                 err |= ret;
11263                 break;
11264         case BTRFS_EXTENT_DATA_REF_KEY:
11265                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11266                 ret = check_extent_data_backref(fs_info,
11267                                 btrfs_extent_data_ref_root(eb, dref),
11268                                 btrfs_extent_data_ref_objectid(eb, dref),
11269                                 btrfs_extent_data_ref_offset(eb, dref),
11270                                 key.objectid, 0,
11271                                 btrfs_extent_data_ref_count(eb, dref));
11272                 err |= ret;
11273                 break;
11274         case BTRFS_SHARED_BLOCK_REF_KEY:
11275                 ret = check_shared_block_backref(fs_info, key.offset,
11276                                                  key.objectid, -1);
11277                 err |= ret;
11278                 break;
11279         case BTRFS_SHARED_DATA_REF_KEY:
11280                 ret = check_shared_data_backref(fs_info, key.offset,
11281                                                 key.objectid);
11282                 err |= ret;
11283                 break;
11284         default:
11285                 break;
11286         }
11287
11288         if (++slot < btrfs_header_nritems(eb))
11289                 goto next;
11290
11291         return err;
11292 }
11293
11294 /*
11295  * Helper function for later fs/subvol tree check.  To determine if a tree
11296  * block should be checked.
11297  * This function will ensure only the direct referencer with lowest rootid to
11298  * check a fs/subvolume tree block.
11299  *
11300  * Backref check at extent tree would detect errors like missing subvolume
11301  * tree, so we can do aggressive check to reduce duplicated checks.
11302  */
11303 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11304 {
11305         struct btrfs_root *extent_root = root->fs_info->extent_root;
11306         struct btrfs_key key;
11307         struct btrfs_path path;
11308         struct extent_buffer *leaf;
11309         int slot;
11310         struct btrfs_extent_item *ei;
11311         unsigned long ptr;
11312         unsigned long end;
11313         int type;
11314         u32 item_size;
11315         u64 offset;
11316         struct btrfs_extent_inline_ref *iref;
11317         int ret;
11318
11319         btrfs_init_path(&path);
11320         key.objectid = btrfs_header_bytenr(eb);
11321         key.type = BTRFS_METADATA_ITEM_KEY;
11322         key.offset = (u64)-1;
11323
11324         /*
11325          * Any failure in backref resolving means we can't determine
11326          * whom the tree block belongs to.
11327          * So in that case, we need to check that tree block
11328          */
11329         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11330         if (ret < 0)
11331                 goto need_check;
11332
11333         ret = btrfs_previous_extent_item(extent_root, &path,
11334                                          btrfs_header_bytenr(eb));
11335         if (ret)
11336                 goto need_check;
11337
11338         leaf = path.nodes[0];
11339         slot = path.slots[0];
11340         btrfs_item_key_to_cpu(leaf, &key, slot);
11341         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11342
11343         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11344                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11345         } else {
11346                 struct btrfs_tree_block_info *info;
11347
11348                 info = (struct btrfs_tree_block_info *)(ei + 1);
11349                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11350         }
11351
11352         item_size = btrfs_item_size_nr(leaf, slot);
11353         ptr = (unsigned long)iref;
11354         end = (unsigned long)ei + item_size;
11355         while (ptr < end) {
11356                 iref = (struct btrfs_extent_inline_ref *)ptr;
11357                 type = btrfs_extent_inline_ref_type(leaf, iref);
11358                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11359
11360                 /*
11361                  * We only check the tree block if current root is
11362                  * the lowest referencer of it.
11363                  */
11364                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11365                     offset < root->objectid) {
11366                         btrfs_release_path(&path);
11367                         return 0;
11368                 }
11369
11370                 ptr += btrfs_extent_inline_ref_size(type);
11371         }
11372         /*
11373          * Normally we should also check keyed tree block ref, but that may be
11374          * very time consuming.  Inlined ref should already make us skip a lot
11375          * of refs now.  So skip search keyed tree block ref.
11376          */
11377
11378 need_check:
11379         btrfs_release_path(&path);
11380         return 1;
11381 }
11382
11383 /*
11384  * Traversal function for tree block. We will do:
11385  * 1) Skip shared fs/subvolume tree blocks
11386  * 2) Update related bytes accounting
11387  * 3) Pre-order traversal
11388  */
11389 static int traverse_tree_block(struct btrfs_root *root,
11390                                 struct extent_buffer *node)
11391 {
11392         struct extent_buffer *eb;
11393         struct btrfs_key key;
11394         struct btrfs_key drop_key;
11395         int level;
11396         u64 nr;
11397         int i;
11398         int err = 0;
11399         int ret;
11400
11401         /*
11402          * Skip shared fs/subvolume tree block, in that case they will
11403          * be checked by referencer with lowest rootid
11404          */
11405         if (is_fstree(root->objectid) && !should_check(root, node))
11406                 return 0;
11407
11408         /* Update bytes accounting */
11409         total_btree_bytes += node->len;
11410         if (fs_root_objectid(btrfs_header_owner(node)))
11411                 total_fs_tree_bytes += node->len;
11412         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11413                 total_extent_tree_bytes += node->len;
11414         if (!found_old_backref &&
11415             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11416             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11417             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11418                 found_old_backref = 1;
11419
11420         /* pre-order tranversal, check itself first */
11421         level = btrfs_header_level(node);
11422         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11423                                    btrfs_header_level(node),
11424                                    btrfs_header_owner(node));
11425         err |= ret;
11426         if (err)
11427                 error(
11428         "check %s failed root %llu bytenr %llu level %d, force continue check",
11429                         level ? "node":"leaf", root->objectid,
11430                         btrfs_header_bytenr(node), btrfs_header_level(node));
11431
11432         if (!level) {
11433                 btree_space_waste += btrfs_leaf_free_space(root, node);
11434                 ret = check_leaf_items(root, node);
11435                 err |= ret;
11436                 return err;
11437         }
11438
11439         nr = btrfs_header_nritems(node);
11440         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11441         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11442                 sizeof(struct btrfs_key_ptr);
11443
11444         /* Then check all its children */
11445         for (i = 0; i < nr; i++) {
11446                 u64 blocknr = btrfs_node_blockptr(node, i);
11447
11448                 btrfs_node_key_to_cpu(node, &key, i);
11449                 if (level == root->root_item.drop_level &&
11450                     is_dropped_key(&key, &drop_key))
11451                         continue;
11452
11453                 /*
11454                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11455                  * to call the function itself.
11456                  */
11457                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11458                 if (extent_buffer_uptodate(eb)) {
11459                         ret = traverse_tree_block(root, eb);
11460                         err |= ret;
11461                 }
11462                 free_extent_buffer(eb);
11463         }
11464
11465         return err;
11466 }
11467
11468 /*
11469  * Low memory usage version check_chunks_and_extents.
11470  */
11471 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11472 {
11473         struct btrfs_path path;
11474         struct btrfs_key key;
11475         struct btrfs_root *root1;
11476         struct btrfs_root *cur_root;
11477         int err = 0;
11478         int ret;
11479
11480         root1 = root->fs_info->chunk_root;
11481         ret = traverse_tree_block(root1, root1->node);
11482         err |= ret;
11483
11484         root1 = root->fs_info->tree_root;
11485         ret = traverse_tree_block(root1, root1->node);
11486         err |= ret;
11487
11488         btrfs_init_path(&path);
11489         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11490         key.offset = 0;
11491         key.type = BTRFS_ROOT_ITEM_KEY;
11492
11493         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11494         if (ret) {
11495                 error("cannot find extent treet in tree_root");
11496                 goto out;
11497         }
11498
11499         while (1) {
11500                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11501                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11502                         goto next;
11503                 key.offset = (u64)-1;
11504
11505                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11506                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11507                                         &key);
11508                 else
11509                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11510                 if (IS_ERR(cur_root) || !cur_root) {
11511                         error("failed to read tree: %lld", key.objectid);
11512                         goto next;
11513                 }
11514
11515                 ret = traverse_tree_block(cur_root, cur_root->node);
11516                 err |= ret;
11517
11518                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11519                         btrfs_free_fs_root(cur_root);
11520 next:
11521                 ret = btrfs_next_item(root1, &path);
11522                 if (ret)
11523                         goto out;
11524         }
11525
11526 out:
11527         btrfs_release_path(&path);
11528         return err;
11529 }
11530
11531 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11532                            struct btrfs_root *root, int overwrite)
11533 {
11534         struct extent_buffer *c;
11535         struct extent_buffer *old = root->node;
11536         int level;
11537         int ret;
11538         struct btrfs_disk_key disk_key = {0,0,0};
11539
11540         level = 0;
11541
11542         if (overwrite) {
11543                 c = old;
11544                 extent_buffer_get(c);
11545                 goto init;
11546         }
11547         c = btrfs_alloc_free_block(trans, root,
11548                                    root->nodesize,
11549                                    root->root_key.objectid,
11550                                    &disk_key, level, 0, 0);
11551         if (IS_ERR(c)) {
11552                 c = old;
11553                 extent_buffer_get(c);
11554                 overwrite = 1;
11555         }
11556 init:
11557         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11558         btrfs_set_header_level(c, level);
11559         btrfs_set_header_bytenr(c, c->start);
11560         btrfs_set_header_generation(c, trans->transid);
11561         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11562         btrfs_set_header_owner(c, root->root_key.objectid);
11563
11564         write_extent_buffer(c, root->fs_info->fsid,
11565                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11566
11567         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11568                             btrfs_header_chunk_tree_uuid(c),
11569                             BTRFS_UUID_SIZE);
11570
11571         btrfs_mark_buffer_dirty(c);
11572         /*
11573          * this case can happen in the following case:
11574          *
11575          * 1.overwrite previous root.
11576          *
11577          * 2.reinit reloc data root, this is because we skip pin
11578          * down reloc data tree before which means we can allocate
11579          * same block bytenr here.
11580          */
11581         if (old->start == c->start) {
11582                 btrfs_set_root_generation(&root->root_item,
11583                                           trans->transid);
11584                 root->root_item.level = btrfs_header_level(root->node);
11585                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11586                                         &root->root_key, &root->root_item);
11587                 if (ret) {
11588                         free_extent_buffer(c);
11589                         return ret;
11590                 }
11591         }
11592         free_extent_buffer(old);
11593         root->node = c;
11594         add_root_to_dirty_list(root);
11595         return 0;
11596 }
11597
11598 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11599                                 struct extent_buffer *eb, int tree_root)
11600 {
11601         struct extent_buffer *tmp;
11602         struct btrfs_root_item *ri;
11603         struct btrfs_key key;
11604         u64 bytenr;
11605         u32 nodesize;
11606         int level = btrfs_header_level(eb);
11607         int nritems;
11608         int ret;
11609         int i;
11610
11611         /*
11612          * If we have pinned this block before, don't pin it again.
11613          * This can not only avoid forever loop with broken filesystem
11614          * but also give us some speedups.
11615          */
11616         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11617                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11618                 return 0;
11619
11620         btrfs_pin_extent(fs_info, eb->start, eb->len);
11621
11622         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11623         nritems = btrfs_header_nritems(eb);
11624         for (i = 0; i < nritems; i++) {
11625                 if (level == 0) {
11626                         btrfs_item_key_to_cpu(eb, &key, i);
11627                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11628                                 continue;
11629                         /* Skip the extent root and reloc roots */
11630                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11631                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11632                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11633                                 continue;
11634                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11635                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11636
11637                         /*
11638                          * If at any point we start needing the real root we
11639                          * will have to build a stump root for the root we are
11640                          * in, but for now this doesn't actually use the root so
11641                          * just pass in extent_root.
11642                          */
11643                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11644                                               nodesize, 0);
11645                         if (!extent_buffer_uptodate(tmp)) {
11646                                 fprintf(stderr, "Error reading root block\n");
11647                                 return -EIO;
11648                         }
11649                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11650                         free_extent_buffer(tmp);
11651                         if (ret)
11652                                 return ret;
11653                 } else {
11654                         bytenr = btrfs_node_blockptr(eb, i);
11655
11656                         /* If we aren't the tree root don't read the block */
11657                         if (level == 1 && !tree_root) {
11658                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11659                                 continue;
11660                         }
11661
11662                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11663                                               nodesize, 0);
11664                         if (!extent_buffer_uptodate(tmp)) {
11665                                 fprintf(stderr, "Error reading tree block\n");
11666                                 return -EIO;
11667                         }
11668                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11669                         free_extent_buffer(tmp);
11670                         if (ret)
11671                                 return ret;
11672                 }
11673         }
11674
11675         return 0;
11676 }
11677
11678 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11679 {
11680         int ret;
11681
11682         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11683         if (ret)
11684                 return ret;
11685
11686         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11687 }
11688
11689 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11690 {
11691         struct btrfs_block_group_cache *cache;
11692         struct btrfs_path path;
11693         struct extent_buffer *leaf;
11694         struct btrfs_chunk *chunk;
11695         struct btrfs_key key;
11696         int ret;
11697         u64 start;
11698
11699         btrfs_init_path(&path);
11700         key.objectid = 0;
11701         key.type = BTRFS_CHUNK_ITEM_KEY;
11702         key.offset = 0;
11703         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11704         if (ret < 0) {
11705                 btrfs_release_path(&path);
11706                 return ret;
11707         }
11708
11709         /*
11710          * We do this in case the block groups were screwed up and had alloc
11711          * bits that aren't actually set on the chunks.  This happens with
11712          * restored images every time and could happen in real life I guess.
11713          */
11714         fs_info->avail_data_alloc_bits = 0;
11715         fs_info->avail_metadata_alloc_bits = 0;
11716         fs_info->avail_system_alloc_bits = 0;
11717
11718         /* First we need to create the in-memory block groups */
11719         while (1) {
11720                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11721                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11722                         if (ret < 0) {
11723                                 btrfs_release_path(&path);
11724                                 return ret;
11725                         }
11726                         if (ret) {
11727                                 ret = 0;
11728                                 break;
11729                         }
11730                 }
11731                 leaf = path.nodes[0];
11732                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11733                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11734                         path.slots[0]++;
11735                         continue;
11736                 }
11737
11738                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11739                 btrfs_add_block_group(fs_info, 0,
11740                                       btrfs_chunk_type(leaf, chunk),
11741                                       key.objectid, key.offset,
11742                                       btrfs_chunk_length(leaf, chunk));
11743                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11744                                  key.offset + btrfs_chunk_length(leaf, chunk));
11745                 path.slots[0]++;
11746         }
11747         start = 0;
11748         while (1) {
11749                 cache = btrfs_lookup_first_block_group(fs_info, start);
11750                 if (!cache)
11751                         break;
11752                 cache->cached = 1;
11753                 start = cache->key.objectid + cache->key.offset;
11754         }
11755
11756         btrfs_release_path(&path);
11757         return 0;
11758 }
11759
11760 static int reset_balance(struct btrfs_trans_handle *trans,
11761                          struct btrfs_fs_info *fs_info)
11762 {
11763         struct btrfs_root *root = fs_info->tree_root;
11764         struct btrfs_path path;
11765         struct extent_buffer *leaf;
11766         struct btrfs_key key;
11767         int del_slot, del_nr = 0;
11768         int ret;
11769         int found = 0;
11770
11771         btrfs_init_path(&path);
11772         key.objectid = BTRFS_BALANCE_OBJECTID;
11773         key.type = BTRFS_BALANCE_ITEM_KEY;
11774         key.offset = 0;
11775         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11776         if (ret) {
11777                 if (ret > 0)
11778                         ret = 0;
11779                 if (!ret)
11780                         goto reinit_data_reloc;
11781                 else
11782                         goto out;
11783         }
11784
11785         ret = btrfs_del_item(trans, root, &path);
11786         if (ret)
11787                 goto out;
11788         btrfs_release_path(&path);
11789
11790         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11791         key.type = BTRFS_ROOT_ITEM_KEY;
11792         key.offset = 0;
11793         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11794         if (ret < 0)
11795                 goto out;
11796         while (1) {
11797                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11798                         if (!found)
11799                                 break;
11800
11801                         if (del_nr) {
11802                                 ret = btrfs_del_items(trans, root, &path,
11803                                                       del_slot, del_nr);
11804                                 del_nr = 0;
11805                                 if (ret)
11806                                         goto out;
11807                         }
11808                         key.offset++;
11809                         btrfs_release_path(&path);
11810
11811                         found = 0;
11812                         ret = btrfs_search_slot(trans, root, &key, &path,
11813                                                 -1, 1);
11814                         if (ret < 0)
11815                                 goto out;
11816                         continue;
11817                 }
11818                 found = 1;
11819                 leaf = path.nodes[0];
11820                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11821                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11822                         break;
11823                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11824                         path.slots[0]++;
11825                         continue;
11826                 }
11827                 if (!del_nr) {
11828                         del_slot = path.slots[0];
11829                         del_nr = 1;
11830                 } else {
11831                         del_nr++;
11832                 }
11833                 path.slots[0]++;
11834         }
11835
11836         if (del_nr) {
11837                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11838                 if (ret)
11839                         goto out;
11840         }
11841         btrfs_release_path(&path);
11842
11843 reinit_data_reloc:
11844         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11845         key.type = BTRFS_ROOT_ITEM_KEY;
11846         key.offset = (u64)-1;
11847         root = btrfs_read_fs_root(fs_info, &key);
11848         if (IS_ERR(root)) {
11849                 fprintf(stderr, "Error reading data reloc tree\n");
11850                 ret = PTR_ERR(root);
11851                 goto out;
11852         }
11853         record_root_in_trans(trans, root);
11854         ret = btrfs_fsck_reinit_root(trans, root, 0);
11855         if (ret)
11856                 goto out;
11857         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11858 out:
11859         btrfs_release_path(&path);
11860         return ret;
11861 }
11862
11863 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11864                               struct btrfs_fs_info *fs_info)
11865 {
11866         u64 start = 0;
11867         int ret;
11868
11869         /*
11870          * The only reason we don't do this is because right now we're just
11871          * walking the trees we find and pinning down their bytes, we don't look
11872          * at any of the leaves.  In order to do mixed groups we'd have to check
11873          * the leaves of any fs roots and pin down the bytes for any file
11874          * extents we find.  Not hard but why do it if we don't have to?
11875          */
11876         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11877                 fprintf(stderr, "We don't support re-initing the extent tree "
11878                         "for mixed block groups yet, please notify a btrfs "
11879                         "developer you want to do this so they can add this "
11880                         "functionality.\n");
11881                 return -EINVAL;
11882         }
11883
11884         /*
11885          * first we need to walk all of the trees except the extent tree and pin
11886          * down the bytes that are in use so we don't overwrite any existing
11887          * metadata.
11888          */
11889         ret = pin_metadata_blocks(fs_info);
11890         if (ret) {
11891                 fprintf(stderr, "error pinning down used bytes\n");
11892                 return ret;
11893         }
11894
11895         /*
11896          * Need to drop all the block groups since we're going to recreate all
11897          * of them again.
11898          */
11899         btrfs_free_block_groups(fs_info);
11900         ret = reset_block_groups(fs_info);
11901         if (ret) {
11902                 fprintf(stderr, "error resetting the block groups\n");
11903                 return ret;
11904         }
11905
11906         /* Ok we can allocate now, reinit the extent root */
11907         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11908         if (ret) {
11909                 fprintf(stderr, "extent root initialization failed\n");
11910                 /*
11911                  * When the transaction code is updated we should end the
11912                  * transaction, but for now progs only knows about commit so
11913                  * just return an error.
11914                  */
11915                 return ret;
11916         }
11917
11918         /*
11919          * Now we have all the in-memory block groups setup so we can make
11920          * allocations properly, and the metadata we care about is safe since we
11921          * pinned all of it above.
11922          */
11923         while (1) {
11924                 struct btrfs_block_group_cache *cache;
11925
11926                 cache = btrfs_lookup_first_block_group(fs_info, start);
11927                 if (!cache)
11928                         break;
11929                 start = cache->key.objectid + cache->key.offset;
11930                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11931                                         &cache->key, &cache->item,
11932                                         sizeof(cache->item));
11933                 if (ret) {
11934                         fprintf(stderr, "Error adding block group\n");
11935                         return ret;
11936                 }
11937                 btrfs_extent_post_op(trans, fs_info->extent_root);
11938         }
11939
11940         ret = reset_balance(trans, fs_info);
11941         if (ret)
11942                 fprintf(stderr, "error resetting the pending balance\n");
11943
11944         return ret;
11945 }
11946
11947 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11948 {
11949         struct btrfs_path path;
11950         struct btrfs_trans_handle *trans;
11951         struct btrfs_key key;
11952         int ret;
11953
11954         printf("Recowing metadata block %llu\n", eb->start);
11955         key.objectid = btrfs_header_owner(eb);
11956         key.type = BTRFS_ROOT_ITEM_KEY;
11957         key.offset = (u64)-1;
11958
11959         root = btrfs_read_fs_root(root->fs_info, &key);
11960         if (IS_ERR(root)) {
11961                 fprintf(stderr, "Couldn't find owner root %llu\n",
11962                         key.objectid);
11963                 return PTR_ERR(root);
11964         }
11965
11966         trans = btrfs_start_transaction(root, 1);
11967         if (IS_ERR(trans))
11968                 return PTR_ERR(trans);
11969
11970         btrfs_init_path(&path);
11971         path.lowest_level = btrfs_header_level(eb);
11972         if (path.lowest_level)
11973                 btrfs_node_key_to_cpu(eb, &key, 0);
11974         else
11975                 btrfs_item_key_to_cpu(eb, &key, 0);
11976
11977         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11978         btrfs_commit_transaction(trans, root);
11979         btrfs_release_path(&path);
11980         return ret;
11981 }
11982
11983 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11984 {
11985         struct btrfs_path path;
11986         struct btrfs_trans_handle *trans;
11987         struct btrfs_key key;
11988         int ret;
11989
11990         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11991                bad->key.type, bad->key.offset);
11992         key.objectid = bad->root_id;
11993         key.type = BTRFS_ROOT_ITEM_KEY;
11994         key.offset = (u64)-1;
11995
11996         root = btrfs_read_fs_root(root->fs_info, &key);
11997         if (IS_ERR(root)) {
11998                 fprintf(stderr, "Couldn't find owner root %llu\n",
11999                         key.objectid);
12000                 return PTR_ERR(root);
12001         }
12002
12003         trans = btrfs_start_transaction(root, 1);
12004         if (IS_ERR(trans))
12005                 return PTR_ERR(trans);
12006
12007         btrfs_init_path(&path);
12008         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12009         if (ret) {
12010                 if (ret > 0)
12011                         ret = 0;
12012                 goto out;
12013         }
12014         ret = btrfs_del_item(trans, root, &path);
12015 out:
12016         btrfs_commit_transaction(trans, root);
12017         btrfs_release_path(&path);
12018         return ret;
12019 }
12020
12021 static int zero_log_tree(struct btrfs_root *root)
12022 {
12023         struct btrfs_trans_handle *trans;
12024         int ret;
12025
12026         trans = btrfs_start_transaction(root, 1);
12027         if (IS_ERR(trans)) {
12028                 ret = PTR_ERR(trans);
12029                 return ret;
12030         }
12031         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12032         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12033         ret = btrfs_commit_transaction(trans, root);
12034         return ret;
12035 }
12036
12037 static int populate_csum(struct btrfs_trans_handle *trans,
12038                          struct btrfs_root *csum_root, char *buf, u64 start,
12039                          u64 len)
12040 {
12041         u64 offset = 0;
12042         u64 sectorsize;
12043         int ret = 0;
12044
12045         while (offset < len) {
12046                 sectorsize = csum_root->sectorsize;
12047                 ret = read_extent_data(csum_root, buf, start + offset,
12048                                        &sectorsize, 0);
12049                 if (ret)
12050                         break;
12051                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12052                                             start + offset, buf, sectorsize);
12053                 if (ret)
12054                         break;
12055                 offset += sectorsize;
12056         }
12057         return ret;
12058 }
12059
12060 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12061                                       struct btrfs_root *csum_root,
12062                                       struct btrfs_root *cur_root)
12063 {
12064         struct btrfs_path path;
12065         struct btrfs_key key;
12066         struct extent_buffer *node;
12067         struct btrfs_file_extent_item *fi;
12068         char *buf = NULL;
12069         u64 start = 0;
12070         u64 len = 0;
12071         int slot = 0;
12072         int ret = 0;
12073
12074         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12075         if (!buf)
12076                 return -ENOMEM;
12077
12078         btrfs_init_path(&path);
12079         key.objectid = 0;
12080         key.offset = 0;
12081         key.type = 0;
12082         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12083         if (ret < 0)
12084                 goto out;
12085         /* Iterate all regular file extents and fill its csum */
12086         while (1) {
12087                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12088
12089                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12090                         goto next;
12091                 node = path.nodes[0];
12092                 slot = path.slots[0];
12093                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12094                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12095                         goto next;
12096                 start = btrfs_file_extent_disk_bytenr(node, fi);
12097                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12098
12099                 ret = populate_csum(trans, csum_root, buf, start, len);
12100                 if (ret == -EEXIST)
12101                         ret = 0;
12102                 if (ret < 0)
12103                         goto out;
12104 next:
12105                 /*
12106                  * TODO: if next leaf is corrupted, jump to nearest next valid
12107                  * leaf.
12108                  */
12109                 ret = btrfs_next_item(cur_root, &path);
12110                 if (ret < 0)
12111                         goto out;
12112                 if (ret > 0) {
12113                         ret = 0;
12114                         goto out;
12115                 }
12116         }
12117
12118 out:
12119         btrfs_release_path(&path);
12120         free(buf);
12121         return ret;
12122 }
12123
12124 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12125                                   struct btrfs_root *csum_root)
12126 {
12127         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12128         struct btrfs_path path;
12129         struct btrfs_root *tree_root = fs_info->tree_root;
12130         struct btrfs_root *cur_root;
12131         struct extent_buffer *node;
12132         struct btrfs_key key;
12133         int slot = 0;
12134         int ret = 0;
12135
12136         btrfs_init_path(&path);
12137         key.objectid = BTRFS_FS_TREE_OBJECTID;
12138         key.offset = 0;
12139         key.type = BTRFS_ROOT_ITEM_KEY;
12140         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12141         if (ret < 0)
12142                 goto out;
12143         if (ret > 0) {
12144                 ret = -ENOENT;
12145                 goto out;
12146         }
12147
12148         while (1) {
12149                 node = path.nodes[0];
12150                 slot = path.slots[0];
12151                 btrfs_item_key_to_cpu(node, &key, slot);
12152                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12153                         goto out;
12154                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12155                         goto next;
12156                 if (!is_fstree(key.objectid))
12157                         goto next;
12158                 key.offset = (u64)-1;
12159
12160                 cur_root = btrfs_read_fs_root(fs_info, &key);
12161                 if (IS_ERR(cur_root) || !cur_root) {
12162                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12163                                 key.objectid);
12164                         goto out;
12165                 }
12166                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12167                                 cur_root);
12168                 if (ret < 0)
12169                         goto out;
12170 next:
12171                 ret = btrfs_next_item(tree_root, &path);
12172                 if (ret > 0) {
12173                         ret = 0;
12174                         goto out;
12175                 }
12176                 if (ret < 0)
12177                         goto out;
12178         }
12179
12180 out:
12181         btrfs_release_path(&path);
12182         return ret;
12183 }
12184
12185 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12186                                       struct btrfs_root *csum_root)
12187 {
12188         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12189         struct btrfs_path path;
12190         struct btrfs_extent_item *ei;
12191         struct extent_buffer *leaf;
12192         char *buf;
12193         struct btrfs_key key;
12194         int ret;
12195
12196         btrfs_init_path(&path);
12197         key.objectid = 0;
12198         key.type = BTRFS_EXTENT_ITEM_KEY;
12199         key.offset = 0;
12200         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12201         if (ret < 0) {
12202                 btrfs_release_path(&path);
12203                 return ret;
12204         }
12205
12206         buf = malloc(csum_root->sectorsize);
12207         if (!buf) {
12208                 btrfs_release_path(&path);
12209                 return -ENOMEM;
12210         }
12211
12212         while (1) {
12213                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12214                         ret = btrfs_next_leaf(extent_root, &path);
12215                         if (ret < 0)
12216                                 break;
12217                         if (ret) {
12218                                 ret = 0;
12219                                 break;
12220                         }
12221                 }
12222                 leaf = path.nodes[0];
12223
12224                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12225                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12226                         path.slots[0]++;
12227                         continue;
12228                 }
12229
12230                 ei = btrfs_item_ptr(leaf, path.slots[0],
12231                                     struct btrfs_extent_item);
12232                 if (!(btrfs_extent_flags(leaf, ei) &
12233                       BTRFS_EXTENT_FLAG_DATA)) {
12234                         path.slots[0]++;
12235                         continue;
12236                 }
12237
12238                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12239                                     key.offset);
12240                 if (ret)
12241                         break;
12242                 path.slots[0]++;
12243         }
12244
12245         btrfs_release_path(&path);
12246         free(buf);
12247         return ret;
12248 }
12249
12250 /*
12251  * Recalculate the csum and put it into the csum tree.
12252  *
12253  * Extent tree init will wipe out all the extent info, so in that case, we
12254  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12255  * will use fs/subvol trees to init the csum tree.
12256  */
12257 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12258                           struct btrfs_root *csum_root,
12259                           int search_fs_tree)
12260 {
12261         if (search_fs_tree)
12262                 return fill_csum_tree_from_fs(trans, csum_root);
12263         else
12264                 return fill_csum_tree_from_extent(trans, csum_root);
12265 }
12266
12267 static void free_roots_info_cache(void)
12268 {
12269         if (!roots_info_cache)
12270                 return;
12271
12272         while (!cache_tree_empty(roots_info_cache)) {
12273                 struct cache_extent *entry;
12274                 struct root_item_info *rii;
12275
12276                 entry = first_cache_extent(roots_info_cache);
12277                 if (!entry)
12278                         break;
12279                 remove_cache_extent(roots_info_cache, entry);
12280                 rii = container_of(entry, struct root_item_info, cache_extent);
12281                 free(rii);
12282         }
12283
12284         free(roots_info_cache);
12285         roots_info_cache = NULL;
12286 }
12287
12288 static int build_roots_info_cache(struct btrfs_fs_info *info)
12289 {
12290         int ret = 0;
12291         struct btrfs_key key;
12292         struct extent_buffer *leaf;
12293         struct btrfs_path path;
12294
12295         if (!roots_info_cache) {
12296                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12297                 if (!roots_info_cache)
12298                         return -ENOMEM;
12299                 cache_tree_init(roots_info_cache);
12300         }
12301
12302         btrfs_init_path(&path);
12303         key.objectid = 0;
12304         key.type = BTRFS_EXTENT_ITEM_KEY;
12305         key.offset = 0;
12306         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12307         if (ret < 0)
12308                 goto out;
12309         leaf = path.nodes[0];
12310
12311         while (1) {
12312                 struct btrfs_key found_key;
12313                 struct btrfs_extent_item *ei;
12314                 struct btrfs_extent_inline_ref *iref;
12315                 int slot = path.slots[0];
12316                 int type;
12317                 u64 flags;
12318                 u64 root_id;
12319                 u8 level;
12320                 struct cache_extent *entry;
12321                 struct root_item_info *rii;
12322
12323                 if (slot >= btrfs_header_nritems(leaf)) {
12324                         ret = btrfs_next_leaf(info->extent_root, &path);
12325                         if (ret < 0) {
12326                                 break;
12327                         } else if (ret) {
12328                                 ret = 0;
12329                                 break;
12330                         }
12331                         leaf = path.nodes[0];
12332                         slot = path.slots[0];
12333                 }
12334
12335                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12336
12337                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12338                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12339                         goto next;
12340
12341                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12342                 flags = btrfs_extent_flags(leaf, ei);
12343
12344                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12345                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12346                         goto next;
12347
12348                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12349                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12350                         level = found_key.offset;
12351                 } else {
12352                         struct btrfs_tree_block_info *binfo;
12353
12354                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12355                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12356                         level = btrfs_tree_block_level(leaf, binfo);
12357                 }
12358
12359                 /*
12360                  * For a root extent, it must be of the following type and the
12361                  * first (and only one) iref in the item.
12362                  */
12363                 type = btrfs_extent_inline_ref_type(leaf, iref);
12364                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12365                         goto next;
12366
12367                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12368                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12369                 if (!entry) {
12370                         rii = malloc(sizeof(struct root_item_info));
12371                         if (!rii) {
12372                                 ret = -ENOMEM;
12373                                 goto out;
12374                         }
12375                         rii->cache_extent.start = root_id;
12376                         rii->cache_extent.size = 1;
12377                         rii->level = (u8)-1;
12378                         entry = &rii->cache_extent;
12379                         ret = insert_cache_extent(roots_info_cache, entry);
12380                         ASSERT(ret == 0);
12381                 } else {
12382                         rii = container_of(entry, struct root_item_info,
12383                                            cache_extent);
12384                 }
12385
12386                 ASSERT(rii->cache_extent.start == root_id);
12387                 ASSERT(rii->cache_extent.size == 1);
12388
12389                 if (level > rii->level || rii->level == (u8)-1) {
12390                         rii->level = level;
12391                         rii->bytenr = found_key.objectid;
12392                         rii->gen = btrfs_extent_generation(leaf, ei);
12393                         rii->node_count = 1;
12394                 } else if (level == rii->level) {
12395                         rii->node_count++;
12396                 }
12397 next:
12398                 path.slots[0]++;
12399         }
12400
12401 out:
12402         btrfs_release_path(&path);
12403
12404         return ret;
12405 }
12406
12407 static int maybe_repair_root_item(struct btrfs_path *path,
12408                                   const struct btrfs_key *root_key,
12409                                   const int read_only_mode)
12410 {
12411         const u64 root_id = root_key->objectid;
12412         struct cache_extent *entry;
12413         struct root_item_info *rii;
12414         struct btrfs_root_item ri;
12415         unsigned long offset;
12416
12417         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12418         if (!entry) {
12419                 fprintf(stderr,
12420                         "Error: could not find extent items for root %llu\n",
12421                         root_key->objectid);
12422                 return -ENOENT;
12423         }
12424
12425         rii = container_of(entry, struct root_item_info, cache_extent);
12426         ASSERT(rii->cache_extent.start == root_id);
12427         ASSERT(rii->cache_extent.size == 1);
12428
12429         if (rii->node_count != 1) {
12430                 fprintf(stderr,
12431                         "Error: could not find btree root extent for root %llu\n",
12432                         root_id);
12433                 return -ENOENT;
12434         }
12435
12436         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12437         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12438
12439         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12440             btrfs_root_level(&ri) != rii->level ||
12441             btrfs_root_generation(&ri) != rii->gen) {
12442
12443                 /*
12444                  * If we're in repair mode but our caller told us to not update
12445                  * the root item, i.e. just check if it needs to be updated, don't
12446                  * print this message, since the caller will call us again shortly
12447                  * for the same root item without read only mode (the caller will
12448                  * open a transaction first).
12449                  */
12450                 if (!(read_only_mode && repair))
12451                         fprintf(stderr,
12452                                 "%sroot item for root %llu,"
12453                                 " current bytenr %llu, current gen %llu, current level %u,"
12454                                 " new bytenr %llu, new gen %llu, new level %u\n",
12455                                 (read_only_mode ? "" : "fixing "),
12456                                 root_id,
12457                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12458                                 btrfs_root_level(&ri),
12459                                 rii->bytenr, rii->gen, rii->level);
12460
12461                 if (btrfs_root_generation(&ri) > rii->gen) {
12462                         fprintf(stderr,
12463                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12464                                 root_id, btrfs_root_generation(&ri), rii->gen);
12465                         return -EINVAL;
12466                 }
12467
12468                 if (!read_only_mode) {
12469                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12470                         btrfs_set_root_level(&ri, rii->level);
12471                         btrfs_set_root_generation(&ri, rii->gen);
12472                         write_extent_buffer(path->nodes[0], &ri,
12473                                             offset, sizeof(ri));
12474                 }
12475
12476                 return 1;
12477         }
12478
12479         return 0;
12480 }
12481
12482 /*
12483  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12484  * caused read-only snapshots to be corrupted if they were created at a moment
12485  * when the source subvolume/snapshot had orphan items. The issue was that the
12486  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12487  * node instead of the post orphan cleanup root node.
12488  * So this function, and its callees, just detects and fixes those cases. Even
12489  * though the regression was for read-only snapshots, this function applies to
12490  * any snapshot/subvolume root.
12491  * This must be run before any other repair code - not doing it so, makes other
12492  * repair code delete or modify backrefs in the extent tree for example, which
12493  * will result in an inconsistent fs after repairing the root items.
12494  */
12495 static int repair_root_items(struct btrfs_fs_info *info)
12496 {
12497         struct btrfs_path path;
12498         struct btrfs_key key;
12499         struct extent_buffer *leaf;
12500         struct btrfs_trans_handle *trans = NULL;
12501         int ret = 0;
12502         int bad_roots = 0;
12503         int need_trans = 0;
12504
12505         btrfs_init_path(&path);
12506
12507         ret = build_roots_info_cache(info);
12508         if (ret)
12509                 goto out;
12510
12511         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12512         key.type = BTRFS_ROOT_ITEM_KEY;
12513         key.offset = 0;
12514
12515 again:
12516         /*
12517          * Avoid opening and committing transactions if a leaf doesn't have
12518          * any root items that need to be fixed, so that we avoid rotating
12519          * backup roots unnecessarily.
12520          */
12521         if (need_trans) {
12522                 trans = btrfs_start_transaction(info->tree_root, 1);
12523                 if (IS_ERR(trans)) {
12524                         ret = PTR_ERR(trans);
12525                         goto out;
12526                 }
12527         }
12528
12529         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12530                                 0, trans ? 1 : 0);
12531         if (ret < 0)
12532                 goto out;
12533         leaf = path.nodes[0];
12534
12535         while (1) {
12536                 struct btrfs_key found_key;
12537
12538                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12539                         int no_more_keys = find_next_key(&path, &key);
12540
12541                         btrfs_release_path(&path);
12542                         if (trans) {
12543                                 ret = btrfs_commit_transaction(trans,
12544                                                                info->tree_root);
12545                                 trans = NULL;
12546                                 if (ret < 0)
12547                                         goto out;
12548                         }
12549                         need_trans = 0;
12550                         if (no_more_keys)
12551                                 break;
12552                         goto again;
12553                 }
12554
12555                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12556
12557                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12558                         goto next;
12559                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12560                         goto next;
12561
12562                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12563                 if (ret < 0)
12564                         goto out;
12565                 if (ret) {
12566                         if (!trans && repair) {
12567                                 need_trans = 1;
12568                                 key = found_key;
12569                                 btrfs_release_path(&path);
12570                                 goto again;
12571                         }
12572                         bad_roots++;
12573                 }
12574 next:
12575                 path.slots[0]++;
12576         }
12577         ret = 0;
12578 out:
12579         free_roots_info_cache();
12580         btrfs_release_path(&path);
12581         if (trans)
12582                 btrfs_commit_transaction(trans, info->tree_root);
12583         if (ret < 0)
12584                 return ret;
12585
12586         return bad_roots;
12587 }
12588
12589 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12590 {
12591         struct btrfs_trans_handle *trans;
12592         struct btrfs_block_group_cache *bg_cache;
12593         u64 current = 0;
12594         int ret = 0;
12595
12596         /* Clear all free space cache inodes and its extent data */
12597         while (1) {
12598                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12599                 if (!bg_cache)
12600                         break;
12601                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12602                 if (ret < 0)
12603                         return ret;
12604                 current = bg_cache->key.objectid + bg_cache->key.offset;
12605         }
12606
12607         /* Don't forget to set cache_generation to -1 */
12608         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12609         if (IS_ERR(trans)) {
12610                 error("failed to update super block cache generation");
12611                 return PTR_ERR(trans);
12612         }
12613         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12614         btrfs_commit_transaction(trans, fs_info->tree_root);
12615
12616         return ret;
12617 }
12618
12619 const char * const cmd_check_usage[] = {
12620         "btrfs check [options] <device>",
12621         "Check structural integrity of a filesystem (unmounted).",
12622         "Check structural integrity of an unmounted filesystem. Verify internal",
12623         "trees' consistency and item connectivity. In the repair mode try to",
12624         "fix the problems found. ",
12625         "WARNING: the repair mode is considered dangerous",
12626         "",
12627         "-s|--super <superblock>     use this superblock copy",
12628         "-b|--backup                 use the first valid backup root copy",
12629         "--repair                    try to repair the filesystem",
12630         "--readonly                  run in read-only mode (default)",
12631         "--init-csum-tree            create a new CRC tree",
12632         "--init-extent-tree          create a new extent tree",
12633         "--mode <MODE>               allows choice of memory/IO trade-offs",
12634         "                            where MODE is one of:",
12635         "                            original - read inodes and extents to memory (requires",
12636         "                                       more memory, does less IO)",
12637         "                            lowmem   - try to use less memory but read blocks again",
12638         "                                       when needed",
12639         "--check-data-csum           verify checksums of data blocks",
12640         "-Q|--qgroup-report          print a report on qgroup consistency",
12641         "-E|--subvol-extents <subvolid>",
12642         "                            print subvolume extents and sharing state",
12643         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12644         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12645         "-p|--progress               indicate progress",
12646         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12647         NULL
12648 };
12649
12650 int cmd_check(int argc, char **argv)
12651 {
12652         struct cache_tree root_cache;
12653         struct btrfs_root *root;
12654         struct btrfs_fs_info *info;
12655         u64 bytenr = 0;
12656         u64 subvolid = 0;
12657         u64 tree_root_bytenr = 0;
12658         u64 chunk_root_bytenr = 0;
12659         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12660         int ret;
12661         int err = 0;
12662         u64 num;
12663         int init_csum_tree = 0;
12664         int readonly = 0;
12665         int clear_space_cache = 0;
12666         int qgroup_report = 0;
12667         int qgroups_repaired = 0;
12668         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12669
12670         while(1) {
12671                 int c;
12672                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12673                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12674                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12675                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12676                 static const struct option long_options[] = {
12677                         { "super", required_argument, NULL, 's' },
12678                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12679                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12680                         { "init-csum-tree", no_argument, NULL,
12681                                 GETOPT_VAL_INIT_CSUM },
12682                         { "init-extent-tree", no_argument, NULL,
12683                                 GETOPT_VAL_INIT_EXTENT },
12684                         { "check-data-csum", no_argument, NULL,
12685                                 GETOPT_VAL_CHECK_CSUM },
12686                         { "backup", no_argument, NULL, 'b' },
12687                         { "subvol-extents", required_argument, NULL, 'E' },
12688                         { "qgroup-report", no_argument, NULL, 'Q' },
12689                         { "tree-root", required_argument, NULL, 'r' },
12690                         { "chunk-root", required_argument, NULL,
12691                                 GETOPT_VAL_CHUNK_TREE },
12692                         { "progress", no_argument, NULL, 'p' },
12693                         { "mode", required_argument, NULL,
12694                                 GETOPT_VAL_MODE },
12695                         { "clear-space-cache", required_argument, NULL,
12696                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12697                         { NULL, 0, NULL, 0}
12698                 };
12699
12700                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12701                 if (c < 0)
12702                         break;
12703                 switch(c) {
12704                         case 'a': /* ignored */ break;
12705                         case 'b':
12706                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12707                                 break;
12708                         case 's':
12709                                 num = arg_strtou64(optarg);
12710                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12711                                         error(
12712                                         "super mirror should be less than %d",
12713                                                 BTRFS_SUPER_MIRROR_MAX);
12714                                         exit(1);
12715                                 }
12716                                 bytenr = btrfs_sb_offset(((int)num));
12717                                 printf("using SB copy %llu, bytenr %llu\n", num,
12718                                        (unsigned long long)bytenr);
12719                                 break;
12720                         case 'Q':
12721                                 qgroup_report = 1;
12722                                 break;
12723                         case 'E':
12724                                 subvolid = arg_strtou64(optarg);
12725                                 break;
12726                         case 'r':
12727                                 tree_root_bytenr = arg_strtou64(optarg);
12728                                 break;
12729                         case GETOPT_VAL_CHUNK_TREE:
12730                                 chunk_root_bytenr = arg_strtou64(optarg);
12731                                 break;
12732                         case 'p':
12733                                 ctx.progress_enabled = true;
12734                                 break;
12735                         case '?':
12736                         case 'h':
12737                                 usage(cmd_check_usage);
12738                         case GETOPT_VAL_REPAIR:
12739                                 printf("enabling repair mode\n");
12740                                 repair = 1;
12741                                 ctree_flags |= OPEN_CTREE_WRITES;
12742                                 break;
12743                         case GETOPT_VAL_READONLY:
12744                                 readonly = 1;
12745                                 break;
12746                         case GETOPT_VAL_INIT_CSUM:
12747                                 printf("Creating a new CRC tree\n");
12748                                 init_csum_tree = 1;
12749                                 repair = 1;
12750                                 ctree_flags |= OPEN_CTREE_WRITES;
12751                                 break;
12752                         case GETOPT_VAL_INIT_EXTENT:
12753                                 init_extent_tree = 1;
12754                                 ctree_flags |= (OPEN_CTREE_WRITES |
12755                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12756                                 repair = 1;
12757                                 break;
12758                         case GETOPT_VAL_CHECK_CSUM:
12759                                 check_data_csum = 1;
12760                                 break;
12761                         case GETOPT_VAL_MODE:
12762                                 check_mode = parse_check_mode(optarg);
12763                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12764                                         error("unknown mode: %s", optarg);
12765                                         exit(1);
12766                                 }
12767                                 break;
12768                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12769                                 if (strcmp(optarg, "v1") == 0) {
12770                                         clear_space_cache = 1;
12771                                 } else if (strcmp(optarg, "v2") == 0) {
12772                                         clear_space_cache = 2;
12773                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12774                                 } else {
12775                                         error(
12776                 "invalid argument to --clear-space-cache, must be v1 or v2");
12777                                         exit(1);
12778                                 }
12779                                 ctree_flags |= OPEN_CTREE_WRITES;
12780                                 break;
12781                 }
12782         }
12783
12784         if (check_argc_exact(argc - optind, 1))
12785                 usage(cmd_check_usage);
12786
12787         if (ctx.progress_enabled) {
12788                 ctx.tp = TASK_NOTHING;
12789                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12790         }
12791
12792         /* This check is the only reason for --readonly to exist */
12793         if (readonly && repair) {
12794                 error("repair options are not compatible with --readonly");
12795                 exit(1);
12796         }
12797
12798         /*
12799          * Not supported yet
12800          */
12801         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12802                 error("low memory mode doesn't support repair yet");
12803                 exit(1);
12804         }
12805
12806         radix_tree_init();
12807         cache_tree_init(&root_cache);
12808
12809         if((ret = check_mounted(argv[optind])) < 0) {
12810                 error("could not check mount status: %s", strerror(-ret));
12811                 err |= !!ret;
12812                 goto err_out;
12813         } else if(ret) {
12814                 error("%s is currently mounted, aborting", argv[optind]);
12815                 ret = -EBUSY;
12816                 err |= !!ret;
12817                 goto err_out;
12818         }
12819
12820         /* only allow partial opening under repair mode */
12821         if (repair)
12822                 ctree_flags |= OPEN_CTREE_PARTIAL;
12823
12824         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12825                                   chunk_root_bytenr, ctree_flags);
12826         if (!info) {
12827                 error("cannot open file system");
12828                 ret = -EIO;
12829                 err |= !!ret;
12830                 goto err_out;
12831         }
12832
12833         global_info = info;
12834         root = info->fs_root;
12835         if (clear_space_cache == 1) {
12836                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12837                         error(
12838                 "free space cache v2 detected, use --clear-space-cache v2");
12839                         ret = 1;
12840                         goto close_out;
12841                 }
12842                 printf("Clearing free space cache\n");
12843                 ret = clear_free_space_cache(info);
12844                 if (ret) {
12845                         error("failed to clear free space cache");
12846                         ret = 1;
12847                 } else {
12848                         printf("Free space cache cleared\n");
12849                 }
12850                 goto close_out;
12851         } else if (clear_space_cache == 2) {
12852                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12853                         printf("no free space cache v2 to clear\n");
12854                         ret = 0;
12855                         goto close_out;
12856                 }
12857                 printf("Clear free space cache v2\n");
12858                 ret = btrfs_clear_free_space_tree(info);
12859                 if (ret) {
12860                         error("failed to clear free space cache v2: %d", ret);
12861                         ret = 1;
12862                 } else {
12863                         printf("free space cache v2 cleared\n");
12864                 }
12865                 goto close_out;
12866         }
12867
12868         /*
12869          * repair mode will force us to commit transaction which
12870          * will make us fail to load log tree when mounting.
12871          */
12872         if (repair && btrfs_super_log_root(info->super_copy)) {
12873                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12874                 if (!ret) {
12875                         ret = 1;
12876                         err |= !!ret;
12877                         goto close_out;
12878                 }
12879                 ret = zero_log_tree(root);
12880                 err |= !!ret;
12881                 if (ret) {
12882                         error("failed to zero log tree: %d", ret);
12883                         goto close_out;
12884                 }
12885         }
12886
12887         uuid_unparse(info->super_copy->fsid, uuidbuf);
12888         if (qgroup_report) {
12889                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12890                        uuidbuf);
12891                 ret = qgroup_verify_all(info);
12892                 err |= !!ret;
12893                 if (ret == 0)
12894                         report_qgroups(1);
12895                 goto close_out;
12896         }
12897         if (subvolid) {
12898                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12899                        subvolid, argv[optind], uuidbuf);
12900                 ret = print_extent_state(info, subvolid);
12901                 err |= !!ret;
12902                 goto close_out;
12903         }
12904         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12905
12906         if (!extent_buffer_uptodate(info->tree_root->node) ||
12907             !extent_buffer_uptodate(info->dev_root->node) ||
12908             !extent_buffer_uptodate(info->chunk_root->node)) {
12909                 error("critical roots corrupted, unable to check the filesystem");
12910                 err |= !!ret;
12911                 ret = -EIO;
12912                 goto close_out;
12913         }
12914
12915         if (init_extent_tree || init_csum_tree) {
12916                 struct btrfs_trans_handle *trans;
12917
12918                 trans = btrfs_start_transaction(info->extent_root, 0);
12919                 if (IS_ERR(trans)) {
12920                         error("error starting transaction");
12921                         ret = PTR_ERR(trans);
12922                         err |= !!ret;
12923                         goto close_out;
12924                 }
12925
12926                 if (init_extent_tree) {
12927                         printf("Creating a new extent tree\n");
12928                         ret = reinit_extent_tree(trans, info);
12929                         err |= !!ret;
12930                         if (ret)
12931                                 goto close_out;
12932                 }
12933
12934                 if (init_csum_tree) {
12935                         printf("Reinitialize checksum tree\n");
12936                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12937                         if (ret) {
12938                                 error("checksum tree initialization failed: %d",
12939                                                 ret);
12940                                 ret = -EIO;
12941                                 err |= !!ret;
12942                                 goto close_out;
12943                         }
12944
12945                         ret = fill_csum_tree(trans, info->csum_root,
12946                                              init_extent_tree);
12947                         err |= !!ret;
12948                         if (ret) {
12949                                 error("checksum tree refilling failed: %d", ret);
12950                                 return -EIO;
12951                         }
12952                 }
12953                 /*
12954                  * Ok now we commit and run the normal fsck, which will add
12955                  * extent entries for all of the items it finds.
12956                  */
12957                 ret = btrfs_commit_transaction(trans, info->extent_root);
12958                 err |= !!ret;
12959                 if (ret)
12960                         goto close_out;
12961         }
12962         if (!extent_buffer_uptodate(info->extent_root->node)) {
12963                 error("critical: extent_root, unable to check the filesystem");
12964                 ret = -EIO;
12965                 err |= !!ret;
12966                 goto close_out;
12967         }
12968         if (!extent_buffer_uptodate(info->csum_root->node)) {
12969                 error("critical: csum_root, unable to check the filesystem");
12970                 ret = -EIO;
12971                 err |= !!ret;
12972                 goto close_out;
12973         }
12974
12975         if (!ctx.progress_enabled)
12976                 fprintf(stderr, "checking extents\n");
12977         if (check_mode == CHECK_MODE_LOWMEM)
12978                 ret = check_chunks_and_extents_v2(root);
12979         else
12980                 ret = check_chunks_and_extents(root);
12981         err |= !!ret;
12982         if (ret)
12983                 error(
12984                 "errors found in extent allocation tree or chunk allocation");
12985
12986         ret = repair_root_items(info);
12987         err |= !!ret;
12988         if (ret < 0) {
12989                 error("failed to repair root items: %s", strerror(-ret));
12990                 goto close_out;
12991         }
12992         if (repair) {
12993                 fprintf(stderr, "Fixed %d roots.\n", ret);
12994                 ret = 0;
12995         } else if (ret > 0) {
12996                 fprintf(stderr,
12997                        "Found %d roots with an outdated root item.\n",
12998                        ret);
12999                 fprintf(stderr,
13000                         "Please run a filesystem check with the option --repair to fix them.\n");
13001                 ret = 1;
13002                 err |= !!ret;
13003                 goto close_out;
13004         }
13005
13006         if (!ctx.progress_enabled) {
13007                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13008                         fprintf(stderr, "checking free space tree\n");
13009                 else
13010                         fprintf(stderr, "checking free space cache\n");
13011         }
13012         ret = check_space_cache(root);
13013         err |= !!ret;
13014         if (ret) {
13015                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13016                         error("errors found in free space tree");
13017                 else
13018                         error("errors found in free space cache");
13019                 goto out;
13020         }
13021
13022         /*
13023          * We used to have to have these hole extents in between our real
13024          * extents so if we don't have this flag set we need to make sure there
13025          * are no gaps in the file extents for inodes, otherwise we can just
13026          * ignore it when this happens.
13027          */
13028         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13029         if (!ctx.progress_enabled)
13030                 fprintf(stderr, "checking fs roots\n");
13031         if (check_mode == CHECK_MODE_LOWMEM)
13032                 ret = check_fs_roots_v2(root->fs_info);
13033         else
13034                 ret = check_fs_roots(root, &root_cache);
13035         err |= !!ret;
13036         if (ret) {
13037                 error("errors found in fs roots");
13038                 goto out;
13039         }
13040
13041         fprintf(stderr, "checking csums\n");
13042         ret = check_csums(root);
13043         err |= !!ret;
13044         if (ret) {
13045                 error("errors found in csum tree");
13046                 goto out;
13047         }
13048
13049         fprintf(stderr, "checking root refs\n");
13050         /* For low memory mode, check_fs_roots_v2 handles root refs */
13051         if (check_mode != CHECK_MODE_LOWMEM) {
13052                 ret = check_root_refs(root, &root_cache);
13053                 err |= !!ret;
13054                 if (ret) {
13055                         error("errors found in root refs");
13056                         goto out;
13057                 }
13058         }
13059
13060         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13061                 struct extent_buffer *eb;
13062
13063                 eb = list_first_entry(&root->fs_info->recow_ebs,
13064                                       struct extent_buffer, recow);
13065                 list_del_init(&eb->recow);
13066                 ret = recow_extent_buffer(root, eb);
13067                 err |= !!ret;
13068                 if (ret) {
13069                         error("fails to fix transid errors");
13070                         break;
13071                 }
13072         }
13073
13074         while (!list_empty(&delete_items)) {
13075                 struct bad_item *bad;
13076
13077                 bad = list_first_entry(&delete_items, struct bad_item, list);
13078                 list_del_init(&bad->list);
13079                 if (repair) {
13080                         ret = delete_bad_item(root, bad);
13081                         err |= !!ret;
13082                 }
13083                 free(bad);
13084         }
13085
13086         if (info->quota_enabled) {
13087                 fprintf(stderr, "checking quota groups\n");
13088                 ret = qgroup_verify_all(info);
13089                 err |= !!ret;
13090                 if (ret) {
13091                         error("failed to check quota groups");
13092                         goto out;
13093                 }
13094                 report_qgroups(0);
13095                 ret = repair_qgroups(info, &qgroups_repaired);
13096                 err |= !!ret;
13097                 if (err) {
13098                         error("failed to repair quota groups");
13099                         goto out;
13100                 }
13101                 ret = 0;
13102         }
13103
13104         if (!list_empty(&root->fs_info->recow_ebs)) {
13105                 error("transid errors in file system");
13106                 ret = 1;
13107                 err |= !!ret;
13108         }
13109 out:
13110         if (found_old_backref) { /*
13111                  * there was a disk format change when mixed
13112                  * backref was in testing tree. The old format
13113                  * existed about one week.
13114                  */
13115                 printf("\n * Found old mixed backref format. "
13116                        "The old format is not supported! *"
13117                        "\n * Please mount the FS in readonly mode, "
13118                        "backup data and re-format the FS. *\n\n");
13119                 err |= 1;
13120         }
13121         printf("found %llu bytes used, ",
13122                (unsigned long long)bytes_used);
13123         if (err)
13124                 printf("error(s) found\n");
13125         else
13126                 printf("no error found\n");
13127         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13128         printf("total tree bytes: %llu\n",
13129                (unsigned long long)total_btree_bytes);
13130         printf("total fs tree bytes: %llu\n",
13131                (unsigned long long)total_fs_tree_bytes);
13132         printf("total extent tree bytes: %llu\n",
13133                (unsigned long long)total_extent_tree_bytes);
13134         printf("btree space waste bytes: %llu\n",
13135                (unsigned long long)btree_space_waste);
13136         printf("file data blocks allocated: %llu\n referenced %llu\n",
13137                 (unsigned long long)data_bytes_allocated,
13138                 (unsigned long long)data_bytes_referenced);
13139
13140         free_qgroup_counts();
13141         free_root_recs_tree(&root_cache);
13142 close_out:
13143         close_ctree(root);
13144 err_out:
13145         if (ctx.progress_enabled)
13146                 task_deinit(ctx.info);
13147
13148         return err;
13149 }