897b15873bb205d75718a28ac0877f90d9200e4c
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 /*
1872  * Returns >0  Found error, not fatal, should continue
1873  * Returns <0  Fatal error, must exit the whole check
1874  * Returns 0   No errors found
1875  */
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877                                struct node_refs *nrefs, int *level, int ext_ref)
1878 {
1879         struct extent_buffer *cur = path->nodes[0];
1880         struct btrfs_key key;
1881         u64 cur_bytenr;
1882         u32 nritems;
1883         u64 first_ino = 0;
1884         int root_level = btrfs_header_level(root->node);
1885         int i;
1886         int ret = 0; /* Final return value */
1887         int err = 0; /* Positive error bitmap */
1888
1889         cur_bytenr = cur->start;
1890
1891         /* skip to first inode item or the first inode number change */
1892         nritems = btrfs_header_nritems(cur);
1893         for (i = 0; i < nritems; i++) {
1894                 btrfs_item_key_to_cpu(cur, &key, i);
1895                 if (i == 0)
1896                         first_ino = key.objectid;
1897                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898                     (first_ino && first_ino != key.objectid))
1899                         break;
1900         }
1901         if (i == nritems) {
1902                 path->slots[0] = nritems;
1903                 return 0;
1904         }
1905         path->slots[0] = i;
1906
1907 again:
1908         err |= check_inode_item(root, path, ext_ref);
1909
1910         if (err & LAST_ITEM)
1911                 goto out;
1912
1913         /* still have inode items in thie leaf */
1914         if (cur->start == cur_bytenr)
1915                 goto again;
1916
1917         /*
1918          * we have switched to another leaf, above nodes may
1919          * have changed, here walk down the path, if a node
1920          * or leaf is shared, check whether we can skip this
1921          * node or leaf.
1922          */
1923         for (i = root_level; i >= 0; i--) {
1924                 if (path->nodes[i]->start == nrefs->bytenr[i])
1925                         continue;
1926
1927                 ret = update_nodes_refs(root,
1928                                 path->nodes[i]->start,
1929                                 nrefs, i);
1930                 if (ret)
1931                         goto out;
1932
1933                 if (!nrefs->need_check[i]) {
1934                         *level += 1;
1935                         break;
1936                 }
1937         }
1938
1939         for (i = 0; i < *level; i++) {
1940                 free_extent_buffer(path->nodes[i]);
1941                 path->nodes[i] = NULL;
1942         }
1943 out:
1944         err &= ~LAST_ITEM;
1945         if (err && !ret)
1946                 ret = err;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 /*
2216  * Returns >0  Found error, should continue
2217  * Returns <0  Fatal error, must exit the whole check
2218  * Returns 0   No errors found
2219  */
2220 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2221                              int *level, struct node_refs *nrefs, int ext_ref)
2222 {
2223         enum btrfs_tree_block_status status;
2224         u64 bytenr;
2225         u64 ptr_gen;
2226         struct extent_buffer *next;
2227         struct extent_buffer *cur;
2228         u32 blocksize;
2229         int ret;
2230
2231         WARN_ON(*level < 0);
2232         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2233
2234         ret = update_nodes_refs(root, path->nodes[*level]->start,
2235                                 nrefs, *level);
2236         if (ret < 0)
2237                 return ret;
2238
2239         while (*level >= 0) {
2240                 WARN_ON(*level < 0);
2241                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2242                 cur = path->nodes[*level];
2243
2244                 if (btrfs_header_level(cur) != *level)
2245                         WARN_ON(1);
2246
2247                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2248                         break;
2249                 /* Don't forgot to check leaf/node validation */
2250                 if (*level == 0) {
2251                         ret = btrfs_check_leaf(root, NULL, cur);
2252                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2253                                 ret = -EIO;
2254                                 break;
2255                         }
2256                         ret = process_one_leaf_v2(root, path, nrefs,
2257                                                   level, ext_ref);
2258                         break;
2259                 } else {
2260                         ret = btrfs_check_node(root, NULL, cur);
2261                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2262                                 ret = -EIO;
2263                                 break;
2264                         }
2265                 }
2266                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2267                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2268                 blocksize = root->nodesize;
2269
2270                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2271                 if (ret)
2272                         break;
2273                 if (!nrefs->need_check[*level - 1]) {
2274                         path->slots[*level]++;
2275                         continue;
2276                 }
2277
2278                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2279                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280                         free_extent_buffer(next);
2281                         reada_walk_down(root, cur, path->slots[*level]);
2282                         next = read_tree_block(root, bytenr, blocksize,
2283                                                ptr_gen);
2284                         if (!extent_buffer_uptodate(next)) {
2285                                 struct btrfs_key node_key;
2286
2287                                 btrfs_node_key_to_cpu(path->nodes[*level],
2288                                                       &node_key,
2289                                                       path->slots[*level]);
2290                                 btrfs_add_corrupt_extent_record(root->fs_info,
2291                                                 &node_key,
2292                                                 path->nodes[*level]->start,
2293                                                 root->nodesize, *level);
2294                                 ret = -EIO;
2295                                 break;
2296                         }
2297                 }
2298
2299                 ret = check_child_node(cur, path->slots[*level], next);
2300                 if (ret < 0) 
2301                         break;
2302
2303                 if (btrfs_is_leaf(next))
2304                         status = btrfs_check_leaf(root, NULL, next);
2305                 else
2306                         status = btrfs_check_node(root, NULL, next);
2307                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2308                         free_extent_buffer(next);
2309                         ret = -EIO;
2310                         break;
2311                 }
2312
2313                 *level = *level - 1;
2314                 free_extent_buffer(path->nodes[*level]);
2315                 path->nodes[*level] = next;
2316                 path->slots[*level] = 0;
2317         }
2318         return ret;
2319 }
2320
2321 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2322                         struct walk_control *wc, int *level)
2323 {
2324         int i;
2325         struct extent_buffer *leaf;
2326
2327         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2328                 leaf = path->nodes[i];
2329                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2330                         path->slots[i]++;
2331                         *level = i;
2332                         return 0;
2333                 } else {
2334                         free_extent_buffer(path->nodes[*level]);
2335                         path->nodes[*level] = NULL;
2336                         BUG_ON(*level > wc->active_node);
2337                         if (*level == wc->active_node)
2338                                 leave_shared_node(root, wc, *level);
2339                         *level = i + 1;
2340                 }
2341         }
2342         return 1;
2343 }
2344
2345 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2346                            int *level)
2347 {
2348         int i;
2349         struct extent_buffer *leaf;
2350
2351         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2352                 leaf = path->nodes[i];
2353                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2354                         path->slots[i]++;
2355                         *level = i;
2356                         return 0;
2357                 } else {
2358                         free_extent_buffer(path->nodes[*level]);
2359                         path->nodes[*level] = NULL;
2360                         *level = i + 1;
2361                 }
2362         }
2363         return 1;
2364 }
2365
2366 static int check_root_dir(struct inode_record *rec)
2367 {
2368         struct inode_backref *backref;
2369         int ret = -1;
2370
2371         if (!rec->found_inode_item || rec->errors)
2372                 goto out;
2373         if (rec->nlink != 1 || rec->found_link != 0)
2374                 goto out;
2375         if (list_empty(&rec->backrefs))
2376                 goto out;
2377         backref = to_inode_backref(rec->backrefs.next);
2378         if (!backref->found_inode_ref)
2379                 goto out;
2380         if (backref->index != 0 || backref->namelen != 2 ||
2381             memcmp(backref->name, "..", 2))
2382                 goto out;
2383         if (backref->found_dir_index || backref->found_dir_item)
2384                 goto out;
2385         ret = 0;
2386 out:
2387         return ret;
2388 }
2389
2390 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2391                               struct btrfs_root *root, struct btrfs_path *path,
2392                               struct inode_record *rec)
2393 {
2394         struct btrfs_inode_item *ei;
2395         struct btrfs_key key;
2396         int ret;
2397
2398         key.objectid = rec->ino;
2399         key.type = BTRFS_INODE_ITEM_KEY;
2400         key.offset = (u64)-1;
2401
2402         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2403         if (ret < 0)
2404                 goto out;
2405         if (ret) {
2406                 if (!path->slots[0]) {
2407                         ret = -ENOENT;
2408                         goto out;
2409                 }
2410                 path->slots[0]--;
2411                 ret = 0;
2412         }
2413         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2414         if (key.objectid != rec->ino) {
2415                 ret = -ENOENT;
2416                 goto out;
2417         }
2418
2419         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2420                             struct btrfs_inode_item);
2421         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2422         btrfs_mark_buffer_dirty(path->nodes[0]);
2423         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2424         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2425                root->root_key.objectid);
2426 out:
2427         btrfs_release_path(path);
2428         return ret;
2429 }
2430
2431 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2432                                     struct btrfs_root *root,
2433                                     struct btrfs_path *path,
2434                                     struct inode_record *rec)
2435 {
2436         int ret;
2437
2438         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2439         btrfs_release_path(path);
2440         if (!ret)
2441                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2442         return ret;
2443 }
2444
2445 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2446                                struct btrfs_root *root,
2447                                struct btrfs_path *path,
2448                                struct inode_record *rec)
2449 {
2450         struct btrfs_inode_item *ei;
2451         struct btrfs_key key;
2452         int ret = 0;
2453
2454         key.objectid = rec->ino;
2455         key.type = BTRFS_INODE_ITEM_KEY;
2456         key.offset = 0;
2457
2458         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2459         if (ret) {
2460                 if (ret > 0)
2461                         ret = -ENOENT;
2462                 goto out;
2463         }
2464
2465         /* Since ret == 0, no need to check anything */
2466         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2467                             struct btrfs_inode_item);
2468         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2469         btrfs_mark_buffer_dirty(path->nodes[0]);
2470         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2471         printf("reset nbytes for ino %llu root %llu\n",
2472                rec->ino, root->root_key.objectid);
2473 out:
2474         btrfs_release_path(path);
2475         return ret;
2476 }
2477
2478 static int add_missing_dir_index(struct btrfs_root *root,
2479                                  struct cache_tree *inode_cache,
2480                                  struct inode_record *rec,
2481                                  struct inode_backref *backref)
2482 {
2483         struct btrfs_path path;
2484         struct btrfs_trans_handle *trans;
2485         struct btrfs_dir_item *dir_item;
2486         struct extent_buffer *leaf;
2487         struct btrfs_key key;
2488         struct btrfs_disk_key disk_key;
2489         struct inode_record *dir_rec;
2490         unsigned long name_ptr;
2491         u32 data_size = sizeof(*dir_item) + backref->namelen;
2492         int ret;
2493
2494         trans = btrfs_start_transaction(root, 1);
2495         if (IS_ERR(trans))
2496                 return PTR_ERR(trans);
2497
2498         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2499                 (unsigned long long)rec->ino);
2500
2501         btrfs_init_path(&path);
2502         key.objectid = backref->dir;
2503         key.type = BTRFS_DIR_INDEX_KEY;
2504         key.offset = backref->index;
2505         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2506         BUG_ON(ret);
2507
2508         leaf = path.nodes[0];
2509         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2510
2511         disk_key.objectid = cpu_to_le64(rec->ino);
2512         disk_key.type = BTRFS_INODE_ITEM_KEY;
2513         disk_key.offset = 0;
2514
2515         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2516         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2517         btrfs_set_dir_data_len(leaf, dir_item, 0);
2518         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2519         name_ptr = (unsigned long)(dir_item + 1);
2520         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2521         btrfs_mark_buffer_dirty(leaf);
2522         btrfs_release_path(&path);
2523         btrfs_commit_transaction(trans, root);
2524
2525         backref->found_dir_index = 1;
2526         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2527         BUG_ON(IS_ERR(dir_rec));
2528         if (!dir_rec)
2529                 return 0;
2530         dir_rec->found_size += backref->namelen;
2531         if (dir_rec->found_size == dir_rec->isize &&
2532             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2533                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2534         if (dir_rec->found_size != dir_rec->isize)
2535                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2536
2537         return 0;
2538 }
2539
2540 static int delete_dir_index(struct btrfs_root *root,
2541                             struct inode_backref *backref)
2542 {
2543         struct btrfs_trans_handle *trans;
2544         struct btrfs_dir_item *di;
2545         struct btrfs_path path;
2546         int ret = 0;
2547
2548         trans = btrfs_start_transaction(root, 1);
2549         if (IS_ERR(trans))
2550                 return PTR_ERR(trans);
2551
2552         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2553                 (unsigned long long)backref->dir,
2554                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2555                 (unsigned long long)root->objectid);
2556
2557         btrfs_init_path(&path);
2558         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2559                                     backref->name, backref->namelen,
2560                                     backref->index, -1);
2561         if (IS_ERR(di)) {
2562                 ret = PTR_ERR(di);
2563                 btrfs_release_path(&path);
2564                 btrfs_commit_transaction(trans, root);
2565                 if (ret == -ENOENT)
2566                         return 0;
2567                 return ret;
2568         }
2569
2570         if (!di)
2571                 ret = btrfs_del_item(trans, root, &path);
2572         else
2573                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2574         BUG_ON(ret);
2575         btrfs_release_path(&path);
2576         btrfs_commit_transaction(trans, root);
2577         return ret;
2578 }
2579
2580 static int create_inode_item(struct btrfs_root *root,
2581                              struct inode_record *rec,
2582                              int root_dir)
2583 {
2584         struct btrfs_trans_handle *trans;
2585         struct btrfs_inode_item inode_item;
2586         time_t now = time(NULL);
2587         int ret;
2588
2589         trans = btrfs_start_transaction(root, 1);
2590         if (IS_ERR(trans)) {
2591                 ret = PTR_ERR(trans);
2592                 return ret;
2593         }
2594
2595         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2596                 "be incomplete, please check permissions and content after "
2597                 "the fsck completes.\n", (unsigned long long)root->objectid,
2598                 (unsigned long long)rec->ino);
2599
2600         memset(&inode_item, 0, sizeof(inode_item));
2601         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2602         if (root_dir)
2603                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2604         else
2605                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2606         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2607         if (rec->found_dir_item) {
2608                 if (rec->found_file_extent)
2609                         fprintf(stderr, "root %llu inode %llu has both a dir "
2610                                 "item and extents, unsure if it is a dir or a "
2611                                 "regular file so setting it as a directory\n",
2612                                 (unsigned long long)root->objectid,
2613                                 (unsigned long long)rec->ino);
2614                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2615                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2616         } else if (!rec->found_dir_item) {
2617                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2618                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2619         }
2620         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2621         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2622         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2623         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2624         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2625         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2626         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2627         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2628
2629         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2630         BUG_ON(ret);
2631         btrfs_commit_transaction(trans, root);
2632         return 0;
2633 }
2634
2635 static int repair_inode_backrefs(struct btrfs_root *root,
2636                                  struct inode_record *rec,
2637                                  struct cache_tree *inode_cache,
2638                                  int delete)
2639 {
2640         struct inode_backref *tmp, *backref;
2641         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2642         int ret = 0;
2643         int repaired = 0;
2644
2645         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2646                 if (!delete && rec->ino == root_dirid) {
2647                         if (!rec->found_inode_item) {
2648                                 ret = create_inode_item(root, rec, 1);
2649                                 if (ret)
2650                                         break;
2651                                 repaired++;
2652                         }
2653                 }
2654
2655                 /* Index 0 for root dir's are special, don't mess with it */
2656                 if (rec->ino == root_dirid && backref->index == 0)
2657                         continue;
2658
2659                 if (delete &&
2660                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2661                      (backref->found_dir_index && backref->found_inode_ref &&
2662                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2663                         ret = delete_dir_index(root, backref);
2664                         if (ret)
2665                                 break;
2666                         repaired++;
2667                         list_del(&backref->list);
2668                         free(backref);
2669                 }
2670
2671                 if (!delete && !backref->found_dir_index &&
2672                     backref->found_dir_item && backref->found_inode_ref) {
2673                         ret = add_missing_dir_index(root, inode_cache, rec,
2674                                                     backref);
2675                         if (ret)
2676                                 break;
2677                         repaired++;
2678                         if (backref->found_dir_item &&
2679                             backref->found_dir_index &&
2680                             backref->found_dir_index) {
2681                                 if (!backref->errors &&
2682                                     backref->found_inode_ref) {
2683                                         list_del(&backref->list);
2684                                         free(backref);
2685                                 }
2686                         }
2687                 }
2688
2689                 if (!delete && (!backref->found_dir_index &&
2690                                 !backref->found_dir_item &&
2691                                 backref->found_inode_ref)) {
2692                         struct btrfs_trans_handle *trans;
2693                         struct btrfs_key location;
2694
2695                         ret = check_dir_conflict(root, backref->name,
2696                                                  backref->namelen,
2697                                                  backref->dir,
2698                                                  backref->index);
2699                         if (ret) {
2700                                 /*
2701                                  * let nlink fixing routine to handle it,
2702                                  * which can do it better.
2703                                  */
2704                                 ret = 0;
2705                                 break;
2706                         }
2707                         location.objectid = rec->ino;
2708                         location.type = BTRFS_INODE_ITEM_KEY;
2709                         location.offset = 0;
2710
2711                         trans = btrfs_start_transaction(root, 1);
2712                         if (IS_ERR(trans)) {
2713                                 ret = PTR_ERR(trans);
2714                                 break;
2715                         }
2716                         fprintf(stderr, "adding missing dir index/item pair "
2717                                 "for inode %llu\n",
2718                                 (unsigned long long)rec->ino);
2719                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2720                                                     backref->namelen,
2721                                                     backref->dir, &location,
2722                                                     imode_to_type(rec->imode),
2723                                                     backref->index);
2724                         BUG_ON(ret);
2725                         btrfs_commit_transaction(trans, root);
2726                         repaired++;
2727                 }
2728
2729                 if (!delete && (backref->found_inode_ref &&
2730                                 backref->found_dir_index &&
2731                                 backref->found_dir_item &&
2732                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2733                                 !rec->found_inode_item)) {
2734                         ret = create_inode_item(root, rec, 0);
2735                         if (ret)
2736                                 break;
2737                         repaired++;
2738                 }
2739
2740         }
2741         return ret ? ret : repaired;
2742 }
2743
2744 /*
2745  * To determine the file type for nlink/inode_item repair
2746  *
2747  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2748  * Return -ENOENT if file type is not found.
2749  */
2750 static int find_file_type(struct inode_record *rec, u8 *type)
2751 {
2752         struct inode_backref *backref;
2753
2754         /* For inode item recovered case */
2755         if (rec->found_inode_item) {
2756                 *type = imode_to_type(rec->imode);
2757                 return 0;
2758         }
2759
2760         list_for_each_entry(backref, &rec->backrefs, list) {
2761                 if (backref->found_dir_index || backref->found_dir_item) {
2762                         *type = backref->filetype;
2763                         return 0;
2764                 }
2765         }
2766         return -ENOENT;
2767 }
2768
2769 /*
2770  * To determine the file name for nlink repair
2771  *
2772  * Return 0 if file name is found, set name and namelen.
2773  * Return -ENOENT if file name is not found.
2774  */
2775 static int find_file_name(struct inode_record *rec,
2776                           char *name, int *namelen)
2777 {
2778         struct inode_backref *backref;
2779
2780         list_for_each_entry(backref, &rec->backrefs, list) {
2781                 if (backref->found_dir_index || backref->found_dir_item ||
2782                     backref->found_inode_ref) {
2783                         memcpy(name, backref->name, backref->namelen);
2784                         *namelen = backref->namelen;
2785                         return 0;
2786                 }
2787         }
2788         return -ENOENT;
2789 }
2790
2791 /* Reset the nlink of the inode to the correct one */
2792 static int reset_nlink(struct btrfs_trans_handle *trans,
2793                        struct btrfs_root *root,
2794                        struct btrfs_path *path,
2795                        struct inode_record *rec)
2796 {
2797         struct inode_backref *backref;
2798         struct inode_backref *tmp;
2799         struct btrfs_key key;
2800         struct btrfs_inode_item *inode_item;
2801         int ret = 0;
2802
2803         /* We don't believe this either, reset it and iterate backref */
2804         rec->found_link = 0;
2805
2806         /* Remove all backref including the valid ones */
2807         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2808                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2809                                    backref->index, backref->name,
2810                                    backref->namelen, 0);
2811                 if (ret < 0)
2812                         goto out;
2813
2814                 /* remove invalid backref, so it won't be added back */
2815                 if (!(backref->found_dir_index &&
2816                       backref->found_dir_item &&
2817                       backref->found_inode_ref)) {
2818                         list_del(&backref->list);
2819                         free(backref);
2820                 } else {
2821                         rec->found_link++;
2822                 }
2823         }
2824
2825         /* Set nlink to 0 */
2826         key.objectid = rec->ino;
2827         key.type = BTRFS_INODE_ITEM_KEY;
2828         key.offset = 0;
2829         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2830         if (ret < 0)
2831                 goto out;
2832         if (ret > 0) {
2833                 ret = -ENOENT;
2834                 goto out;
2835         }
2836         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2837                                     struct btrfs_inode_item);
2838         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2839         btrfs_mark_buffer_dirty(path->nodes[0]);
2840         btrfs_release_path(path);
2841
2842         /*
2843          * Add back valid inode_ref/dir_item/dir_index,
2844          * add_link() will handle the nlink inc, so new nlink must be correct
2845          */
2846         list_for_each_entry(backref, &rec->backrefs, list) {
2847                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2848                                      backref->name, backref->namelen,
2849                                      backref->filetype, &backref->index, 1);
2850                 if (ret < 0)
2851                         goto out;
2852         }
2853 out:
2854         btrfs_release_path(path);
2855         return ret;
2856 }
2857
2858 static int get_highest_inode(struct btrfs_trans_handle *trans,
2859                                 struct btrfs_root *root,
2860                                 struct btrfs_path *path,
2861                                 u64 *highest_ino)
2862 {
2863         struct btrfs_key key, found_key;
2864         int ret;
2865
2866         btrfs_init_path(path);
2867         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2868         key.offset = -1;
2869         key.type = BTRFS_INODE_ITEM_KEY;
2870         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2871         if (ret == 1) {
2872                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2873                                 path->slots[0] - 1);
2874                 *highest_ino = found_key.objectid;
2875                 ret = 0;
2876         }
2877         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2878                 ret = -EOVERFLOW;
2879         btrfs_release_path(path);
2880         return ret;
2881 }
2882
2883 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2884                                struct btrfs_root *root,
2885                                struct btrfs_path *path,
2886                                struct inode_record *rec)
2887 {
2888         char *dir_name = "lost+found";
2889         char namebuf[BTRFS_NAME_LEN] = {0};
2890         u64 lost_found_ino;
2891         u32 mode = 0700;
2892         u8 type = 0;
2893         int namelen = 0;
2894         int name_recovered = 0;
2895         int type_recovered = 0;
2896         int ret = 0;
2897
2898         /*
2899          * Get file name and type first before these invalid inode ref
2900          * are deleted by remove_all_invalid_backref()
2901          */
2902         name_recovered = !find_file_name(rec, namebuf, &namelen);
2903         type_recovered = !find_file_type(rec, &type);
2904
2905         if (!name_recovered) {
2906                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2907                        rec->ino, rec->ino);
2908                 namelen = count_digits(rec->ino);
2909                 sprintf(namebuf, "%llu", rec->ino);
2910                 name_recovered = 1;
2911         }
2912         if (!type_recovered) {
2913                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2914                        rec->ino);
2915                 type = BTRFS_FT_REG_FILE;
2916                 type_recovered = 1;
2917         }
2918
2919         ret = reset_nlink(trans, root, path, rec);
2920         if (ret < 0) {
2921                 fprintf(stderr,
2922                         "Failed to reset nlink for inode %llu: %s\n",
2923                         rec->ino, strerror(-ret));
2924                 goto out;
2925         }
2926
2927         if (rec->found_link == 0) {
2928                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2929                 if (ret < 0)
2930                         goto out;
2931                 lost_found_ino++;
2932                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2933                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2934                                   mode);
2935                 if (ret < 0) {
2936                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2937                                 dir_name, strerror(-ret));
2938                         goto out;
2939                 }
2940                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2941                                      namebuf, namelen, type, NULL, 1);
2942                 /*
2943                  * Add ".INO" suffix several times to handle case where
2944                  * "FILENAME.INO" is already taken by another file.
2945                  */
2946                 while (ret == -EEXIST) {
2947                         /*
2948                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2949                          */
2950                         if (namelen + count_digits(rec->ino) + 1 >
2951                             BTRFS_NAME_LEN) {
2952                                 ret = -EFBIG;
2953                                 goto out;
2954                         }
2955                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2956                                  ".%llu", rec->ino);
2957                         namelen += count_digits(rec->ino) + 1;
2958                         ret = btrfs_add_link(trans, root, rec->ino,
2959                                              lost_found_ino, namebuf,
2960                                              namelen, type, NULL, 1);
2961                 }
2962                 if (ret < 0) {
2963                         fprintf(stderr,
2964                                 "Failed to link the inode %llu to %s dir: %s\n",
2965                                 rec->ino, dir_name, strerror(-ret));
2966                         goto out;
2967                 }
2968                 /*
2969                  * Just increase the found_link, don't actually add the
2970                  * backref. This will make things easier and this inode
2971                  * record will be freed after the repair is done.
2972                  * So fsck will not report problem about this inode.
2973                  */
2974                 rec->found_link++;
2975                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2976                        namelen, namebuf, dir_name);
2977         }
2978         printf("Fixed the nlink of inode %llu\n", rec->ino);
2979 out:
2980         /*
2981          * Clear the flag anyway, or we will loop forever for the same inode
2982          * as it will not be removed from the bad inode list and the dead loop
2983          * happens.
2984          */
2985         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2986         btrfs_release_path(path);
2987         return ret;
2988 }
2989
2990 /*
2991  * Check if there is any normal(reg or prealloc) file extent for given
2992  * ino.
2993  * This is used to determine the file type when neither its dir_index/item or
2994  * inode_item exists.
2995  *
2996  * This will *NOT* report error, if any error happens, just consider it does
2997  * not have any normal file extent.
2998  */
2999 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3000 {
3001         struct btrfs_path path;
3002         struct btrfs_key key;
3003         struct btrfs_key found_key;
3004         struct btrfs_file_extent_item *fi;
3005         u8 type;
3006         int ret = 0;
3007
3008         btrfs_init_path(&path);
3009         key.objectid = ino;
3010         key.type = BTRFS_EXTENT_DATA_KEY;
3011         key.offset = 0;
3012
3013         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3014         if (ret < 0) {
3015                 ret = 0;
3016                 goto out;
3017         }
3018         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3019                 ret = btrfs_next_leaf(root, &path);
3020                 if (ret) {
3021                         ret = 0;
3022                         goto out;
3023                 }
3024         }
3025         while (1) {
3026                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3027                                       path.slots[0]);
3028                 if (found_key.objectid != ino ||
3029                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3030                         break;
3031                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3032                                     struct btrfs_file_extent_item);
3033                 type = btrfs_file_extent_type(path.nodes[0], fi);
3034                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3035                         ret = 1;
3036                         goto out;
3037                 }
3038         }
3039 out:
3040         btrfs_release_path(&path);
3041         return ret;
3042 }
3043
3044 static u32 btrfs_type_to_imode(u8 type)
3045 {
3046         static u32 imode_by_btrfs_type[] = {
3047                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3048                 [BTRFS_FT_DIR]          = S_IFDIR,
3049                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3050                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3051                 [BTRFS_FT_FIFO]         = S_IFIFO,
3052                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3053                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3054         };
3055
3056         return imode_by_btrfs_type[(type)];
3057 }
3058
3059 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3060                                 struct btrfs_root *root,
3061                                 struct btrfs_path *path,
3062                                 struct inode_record *rec)
3063 {
3064         u8 filetype;
3065         u32 mode = 0700;
3066         int type_recovered = 0;
3067         int ret = 0;
3068
3069         printf("Trying to rebuild inode:%llu\n", rec->ino);
3070
3071         type_recovered = !find_file_type(rec, &filetype);
3072
3073         /*
3074          * Try to determine inode type if type not found.
3075          *
3076          * For found regular file extent, it must be FILE.
3077          * For found dir_item/index, it must be DIR.
3078          *
3079          * For undetermined one, use FILE as fallback.
3080          *
3081          * TODO:
3082          * 1. If found backref(inode_index/item is already handled) to it,
3083          *    it must be DIR.
3084          *    Need new inode-inode ref structure to allow search for that.
3085          */
3086         if (!type_recovered) {
3087                 if (rec->found_file_extent &&
3088                     find_normal_file_extent(root, rec->ino)) {
3089                         type_recovered = 1;
3090                         filetype = BTRFS_FT_REG_FILE;
3091                 } else if (rec->found_dir_item) {
3092                         type_recovered = 1;
3093                         filetype = BTRFS_FT_DIR;
3094                 } else if (!list_empty(&rec->orphan_extents)) {
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 } else{
3098                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3099                                rec->ino);
3100                         type_recovered = 1;
3101                         filetype = BTRFS_FT_REG_FILE;
3102                 }
3103         }
3104
3105         ret = btrfs_new_inode(trans, root, rec->ino,
3106                               mode | btrfs_type_to_imode(filetype));
3107         if (ret < 0)
3108                 goto out;
3109
3110         /*
3111          * Here inode rebuild is done, we only rebuild the inode item,
3112          * don't repair the nlink(like move to lost+found).
3113          * That is the job of nlink repair.
3114          *
3115          * We just fill the record and return
3116          */
3117         rec->found_dir_item = 1;
3118         rec->imode = mode | btrfs_type_to_imode(filetype);
3119         rec->nlink = 0;
3120         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3121         /* Ensure the inode_nlinks repair function will be called */
3122         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3123 out:
3124         return ret;
3125 }
3126
3127 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3128                                       struct btrfs_root *root,
3129                                       struct btrfs_path *path,
3130                                       struct inode_record *rec)
3131 {
3132         struct orphan_data_extent *orphan;
3133         struct orphan_data_extent *tmp;
3134         int ret = 0;
3135
3136         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3137                 /*
3138                  * Check for conflicting file extents
3139                  *
3140                  * Here we don't know whether the extents is compressed or not,
3141                  * so we can only assume it not compressed nor data offset,
3142                  * and use its disk_len as extent length.
3143                  */
3144                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3145                                        orphan->offset, orphan->disk_len, 0);
3146                 btrfs_release_path(path);
3147                 if (ret < 0)
3148                         goto out;
3149                 if (!ret) {
3150                         fprintf(stderr,
3151                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3152                                 orphan->disk_bytenr, orphan->disk_len);
3153                         ret = btrfs_free_extent(trans,
3154                                         root->fs_info->extent_root,
3155                                         orphan->disk_bytenr, orphan->disk_len,
3156                                         0, root->objectid, orphan->objectid,
3157                                         orphan->offset);
3158                         if (ret < 0)
3159                                 goto out;
3160                 }
3161                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3162                                 orphan->offset, orphan->disk_bytenr,
3163                                 orphan->disk_len, orphan->disk_len);
3164                 if (ret < 0)
3165                         goto out;
3166
3167                 /* Update file size info */
3168                 rec->found_size += orphan->disk_len;
3169                 if (rec->found_size == rec->nbytes)
3170                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3171
3172                 /* Update the file extent hole info too */
3173                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3174                                            orphan->disk_len);
3175                 if (ret < 0)
3176                         goto out;
3177                 if (RB_EMPTY_ROOT(&rec->holes))
3178                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3179
3180                 list_del(&orphan->list);
3181                 free(orphan);
3182         }
3183         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3184 out:
3185         return ret;
3186 }
3187
3188 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3189                                         struct btrfs_root *root,
3190                                         struct btrfs_path *path,
3191                                         struct inode_record *rec)
3192 {
3193         struct rb_node *node;
3194         struct file_extent_hole *hole;
3195         int found = 0;
3196         int ret = 0;
3197
3198         node = rb_first(&rec->holes);
3199
3200         while (node) {
3201                 found = 1;
3202                 hole = rb_entry(node, struct file_extent_hole, node);
3203                 ret = btrfs_punch_hole(trans, root, rec->ino,
3204                                        hole->start, hole->len);
3205                 if (ret < 0)
3206                         goto out;
3207                 ret = del_file_extent_hole(&rec->holes, hole->start,
3208                                            hole->len);
3209                 if (ret < 0)
3210                         goto out;
3211                 if (RB_EMPTY_ROOT(&rec->holes))
3212                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3213                 node = rb_first(&rec->holes);
3214         }
3215         /* special case for a file losing all its file extent */
3216         if (!found) {
3217                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3218                                        round_up(rec->isize, root->sectorsize));
3219                 if (ret < 0)
3220                         goto out;
3221         }
3222         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3223                rec->ino, root->objectid);
3224 out:
3225         return ret;
3226 }
3227
3228 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3229 {
3230         struct btrfs_trans_handle *trans;
3231         struct btrfs_path path;
3232         int ret = 0;
3233
3234         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3235                              I_ERR_NO_ORPHAN_ITEM |
3236                              I_ERR_LINK_COUNT_WRONG |
3237                              I_ERR_NO_INODE_ITEM |
3238                              I_ERR_FILE_EXTENT_ORPHAN |
3239                              I_ERR_FILE_EXTENT_DISCOUNT|
3240                              I_ERR_FILE_NBYTES_WRONG)))
3241                 return rec->errors;
3242
3243         /*
3244          * For nlink repair, it may create a dir and add link, so
3245          * 2 for parent(256)'s dir_index and dir_item
3246          * 2 for lost+found dir's inode_item and inode_ref
3247          * 1 for the new inode_ref of the file
3248          * 2 for lost+found dir's dir_index and dir_item for the file
3249          */
3250         trans = btrfs_start_transaction(root, 7);
3251         if (IS_ERR(trans))
3252                 return PTR_ERR(trans);
3253
3254         btrfs_init_path(&path);
3255         if (rec->errors & I_ERR_NO_INODE_ITEM)
3256                 ret = repair_inode_no_item(trans, root, &path, rec);
3257         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3258                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3259         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3260                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3261         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3262                 ret = repair_inode_isize(trans, root, &path, rec);
3263         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3264                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3265         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3266                 ret = repair_inode_nlinks(trans, root, &path, rec);
3267         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3268                 ret = repair_inode_nbytes(trans, root, &path, rec);
3269         btrfs_commit_transaction(trans, root);
3270         btrfs_release_path(&path);
3271         return ret;
3272 }
3273
3274 static int check_inode_recs(struct btrfs_root *root,
3275                             struct cache_tree *inode_cache)
3276 {
3277         struct cache_extent *cache;
3278         struct ptr_node *node;
3279         struct inode_record *rec;
3280         struct inode_backref *backref;
3281         int stage = 0;
3282         int ret = 0;
3283         int err = 0;
3284         u64 error = 0;
3285         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3286
3287         if (btrfs_root_refs(&root->root_item) == 0) {
3288                 if (!cache_tree_empty(inode_cache))
3289                         fprintf(stderr, "warning line %d\n", __LINE__);
3290                 return 0;
3291         }
3292
3293         /*
3294          * We need to repair backrefs first because we could change some of the
3295          * errors in the inode recs.
3296          *
3297          * We also need to go through and delete invalid backrefs first and then
3298          * add the correct ones second.  We do this because we may get EEXIST
3299          * when adding back the correct index because we hadn't yet deleted the
3300          * invalid index.
3301          *
3302          * For example, if we were missing a dir index then the directories
3303          * isize would be wrong, so if we fixed the isize to what we thought it
3304          * would be and then fixed the backref we'd still have a invalid fs, so
3305          * we need to add back the dir index and then check to see if the isize
3306          * is still wrong.
3307          */
3308         while (stage < 3) {
3309                 stage++;
3310                 if (stage == 3 && !err)
3311                         break;
3312
3313                 cache = search_cache_extent(inode_cache, 0);
3314                 while (repair && cache) {
3315                         node = container_of(cache, struct ptr_node, cache);
3316                         rec = node->data;
3317                         cache = next_cache_extent(cache);
3318
3319                         /* Need to free everything up and rescan */
3320                         if (stage == 3) {
3321                                 remove_cache_extent(inode_cache, &node->cache);
3322                                 free(node);
3323                                 free_inode_rec(rec);
3324                                 continue;
3325                         }
3326
3327                         if (list_empty(&rec->backrefs))
3328                                 continue;
3329
3330                         ret = repair_inode_backrefs(root, rec, inode_cache,
3331                                                     stage == 1);
3332                         if (ret < 0) {
3333                                 err = ret;
3334                                 stage = 2;
3335                                 break;
3336                         } if (ret > 0) {
3337                                 err = -EAGAIN;
3338                         }
3339                 }
3340         }
3341         if (err)
3342                 return err;
3343
3344         rec = get_inode_rec(inode_cache, root_dirid, 0);
3345         BUG_ON(IS_ERR(rec));
3346         if (rec) {
3347                 ret = check_root_dir(rec);
3348                 if (ret) {
3349                         fprintf(stderr, "root %llu root dir %llu error\n",
3350                                 (unsigned long long)root->root_key.objectid,
3351                                 (unsigned long long)root_dirid);
3352                         print_inode_error(root, rec);
3353                         error++;
3354                 }
3355         } else {
3356                 if (repair) {
3357                         struct btrfs_trans_handle *trans;
3358
3359                         trans = btrfs_start_transaction(root, 1);
3360                         if (IS_ERR(trans)) {
3361                                 err = PTR_ERR(trans);
3362                                 return err;
3363                         }
3364
3365                         fprintf(stderr,
3366                                 "root %llu missing its root dir, recreating\n",
3367                                 (unsigned long long)root->objectid);
3368
3369                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3370                         BUG_ON(ret);
3371
3372                         btrfs_commit_transaction(trans, root);
3373                         return -EAGAIN;
3374                 }
3375
3376                 fprintf(stderr, "root %llu root dir %llu not found\n",
3377                         (unsigned long long)root->root_key.objectid,
3378                         (unsigned long long)root_dirid);
3379         }
3380
3381         while (1) {
3382                 cache = search_cache_extent(inode_cache, 0);
3383                 if (!cache)
3384                         break;
3385                 node = container_of(cache, struct ptr_node, cache);
3386                 rec = node->data;
3387                 remove_cache_extent(inode_cache, &node->cache);
3388                 free(node);
3389                 if (rec->ino == root_dirid ||
3390                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3391                         free_inode_rec(rec);
3392                         continue;
3393                 }
3394
3395                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3396                         ret = check_orphan_item(root, rec->ino);
3397                         if (ret == 0)
3398                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3399                         if (can_free_inode_rec(rec)) {
3400                                 free_inode_rec(rec);
3401                                 continue;
3402                         }
3403                 }
3404
3405                 if (!rec->found_inode_item)
3406                         rec->errors |= I_ERR_NO_INODE_ITEM;
3407                 if (rec->found_link != rec->nlink)
3408                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3409                 if (repair) {
3410                         ret = try_repair_inode(root, rec);
3411                         if (ret == 0 && can_free_inode_rec(rec)) {
3412                                 free_inode_rec(rec);
3413                                 continue;
3414                         }
3415                         ret = 0;
3416                 }
3417
3418                 if (!(repair && ret == 0))
3419                         error++;
3420                 print_inode_error(root, rec);
3421                 list_for_each_entry(backref, &rec->backrefs, list) {
3422                         if (!backref->found_dir_item)
3423                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3424                         if (!backref->found_dir_index)
3425                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3426                         if (!backref->found_inode_ref)
3427                                 backref->errors |= REF_ERR_NO_INODE_REF;
3428                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3429                                 " namelen %u name %s filetype %d errors %x",
3430                                 (unsigned long long)backref->dir,
3431                                 (unsigned long long)backref->index,
3432                                 backref->namelen, backref->name,
3433                                 backref->filetype, backref->errors);
3434                         print_ref_error(backref->errors);
3435                 }
3436                 free_inode_rec(rec);
3437         }
3438         return (error > 0) ? -1 : 0;
3439 }
3440
3441 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3442                                         u64 objectid)
3443 {
3444         struct cache_extent *cache;
3445         struct root_record *rec = NULL;
3446         int ret;
3447
3448         cache = lookup_cache_extent(root_cache, objectid, 1);
3449         if (cache) {
3450                 rec = container_of(cache, struct root_record, cache);
3451         } else {
3452                 rec = calloc(1, sizeof(*rec));
3453                 if (!rec)
3454                         return ERR_PTR(-ENOMEM);
3455                 rec->objectid = objectid;
3456                 INIT_LIST_HEAD(&rec->backrefs);
3457                 rec->cache.start = objectid;
3458                 rec->cache.size = 1;
3459
3460                 ret = insert_cache_extent(root_cache, &rec->cache);
3461                 if (ret)
3462                         return ERR_PTR(-EEXIST);
3463         }
3464         return rec;
3465 }
3466
3467 static struct root_backref *get_root_backref(struct root_record *rec,
3468                                              u64 ref_root, u64 dir, u64 index,
3469                                              const char *name, int namelen)
3470 {
3471         struct root_backref *backref;
3472
3473         list_for_each_entry(backref, &rec->backrefs, list) {
3474                 if (backref->ref_root != ref_root || backref->dir != dir ||
3475                     backref->namelen != namelen)
3476                         continue;
3477                 if (memcmp(name, backref->name, namelen))
3478                         continue;
3479                 return backref;
3480         }
3481
3482         backref = calloc(1, sizeof(*backref) + namelen + 1);
3483         if (!backref)
3484                 return NULL;
3485         backref->ref_root = ref_root;
3486         backref->dir = dir;
3487         backref->index = index;
3488         backref->namelen = namelen;
3489         memcpy(backref->name, name, namelen);
3490         backref->name[namelen] = '\0';
3491         list_add_tail(&backref->list, &rec->backrefs);
3492         return backref;
3493 }
3494
3495 static void free_root_record(struct cache_extent *cache)
3496 {
3497         struct root_record *rec;
3498         struct root_backref *backref;
3499
3500         rec = container_of(cache, struct root_record, cache);
3501         while (!list_empty(&rec->backrefs)) {
3502                 backref = to_root_backref(rec->backrefs.next);
3503                 list_del(&backref->list);
3504                 free(backref);
3505         }
3506
3507         free(rec);
3508 }
3509
3510 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3511
3512 static int add_root_backref(struct cache_tree *root_cache,
3513                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3514                             const char *name, int namelen,
3515                             int item_type, int errors)
3516 {
3517         struct root_record *rec;
3518         struct root_backref *backref;
3519
3520         rec = get_root_rec(root_cache, root_id);
3521         BUG_ON(IS_ERR(rec));
3522         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3523         BUG_ON(!backref);
3524
3525         backref->errors |= errors;
3526
3527         if (item_type != BTRFS_DIR_ITEM_KEY) {
3528                 if (backref->found_dir_index || backref->found_back_ref ||
3529                     backref->found_forward_ref) {
3530                         if (backref->index != index)
3531                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3532                 } else {
3533                         backref->index = index;
3534                 }
3535         }
3536
3537         if (item_type == BTRFS_DIR_ITEM_KEY) {
3538                 if (backref->found_forward_ref)
3539                         rec->found_ref++;
3540                 backref->found_dir_item = 1;
3541         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3542                 backref->found_dir_index = 1;
3543         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3544                 if (backref->found_forward_ref)
3545                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3546                 else if (backref->found_dir_item)
3547                         rec->found_ref++;
3548                 backref->found_forward_ref = 1;
3549         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3550                 if (backref->found_back_ref)
3551                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3552                 backref->found_back_ref = 1;
3553         } else {
3554                 BUG_ON(1);
3555         }
3556
3557         if (backref->found_forward_ref && backref->found_dir_item)
3558                 backref->reachable = 1;
3559         return 0;
3560 }
3561
3562 static int merge_root_recs(struct btrfs_root *root,
3563                            struct cache_tree *src_cache,
3564                            struct cache_tree *dst_cache)
3565 {
3566         struct cache_extent *cache;
3567         struct ptr_node *node;
3568         struct inode_record *rec;
3569         struct inode_backref *backref;
3570         int ret = 0;
3571
3572         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3573                 free_inode_recs_tree(src_cache);
3574                 return 0;
3575         }
3576
3577         while (1) {
3578                 cache = search_cache_extent(src_cache, 0);
3579                 if (!cache)
3580                         break;
3581                 node = container_of(cache, struct ptr_node, cache);
3582                 rec = node->data;
3583                 remove_cache_extent(src_cache, &node->cache);
3584                 free(node);
3585
3586                 ret = is_child_root(root, root->objectid, rec->ino);
3587                 if (ret < 0)
3588                         break;
3589                 else if (ret == 0)
3590                         goto skip;
3591
3592                 list_for_each_entry(backref, &rec->backrefs, list) {
3593                         BUG_ON(backref->found_inode_ref);
3594                         if (backref->found_dir_item)
3595                                 add_root_backref(dst_cache, rec->ino,
3596                                         root->root_key.objectid, backref->dir,
3597                                         backref->index, backref->name,
3598                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3599                                         backref->errors);
3600                         if (backref->found_dir_index)
3601                                 add_root_backref(dst_cache, rec->ino,
3602                                         root->root_key.objectid, backref->dir,
3603                                         backref->index, backref->name,
3604                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3605                                         backref->errors);
3606                 }
3607 skip:
3608                 free_inode_rec(rec);
3609         }
3610         if (ret < 0)
3611                 return ret;
3612         return 0;
3613 }
3614
3615 static int check_root_refs(struct btrfs_root *root,
3616                            struct cache_tree *root_cache)
3617 {
3618         struct root_record *rec;
3619         struct root_record *ref_root;
3620         struct root_backref *backref;
3621         struct cache_extent *cache;
3622         int loop = 1;
3623         int ret;
3624         int error;
3625         int errors = 0;
3626
3627         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3628         BUG_ON(IS_ERR(rec));
3629         rec->found_ref = 1;
3630
3631         /* fixme: this can not detect circular references */
3632         while (loop) {
3633                 loop = 0;
3634                 cache = search_cache_extent(root_cache, 0);
3635                 while (1) {
3636                         if (!cache)
3637                                 break;
3638                         rec = container_of(cache, struct root_record, cache);
3639                         cache = next_cache_extent(cache);
3640
3641                         if (rec->found_ref == 0)
3642                                 continue;
3643
3644                         list_for_each_entry(backref, &rec->backrefs, list) {
3645                                 if (!backref->reachable)
3646                                         continue;
3647
3648                                 ref_root = get_root_rec(root_cache,
3649                                                         backref->ref_root);
3650                                 BUG_ON(IS_ERR(ref_root));
3651                                 if (ref_root->found_ref > 0)
3652                                         continue;
3653
3654                                 backref->reachable = 0;
3655                                 rec->found_ref--;
3656                                 if (rec->found_ref == 0)
3657                                         loop = 1;
3658                         }
3659                 }
3660         }
3661
3662         cache = search_cache_extent(root_cache, 0);
3663         while (1) {
3664                 if (!cache)
3665                         break;
3666                 rec = container_of(cache, struct root_record, cache);
3667                 cache = next_cache_extent(cache);
3668
3669                 if (rec->found_ref == 0 &&
3670                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3671                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3672                         ret = check_orphan_item(root->fs_info->tree_root,
3673                                                 rec->objectid);
3674                         if (ret == 0)
3675                                 continue;
3676
3677                         /*
3678                          * If we don't have a root item then we likely just have
3679                          * a dir item in a snapshot for this root but no actual
3680                          * ref key or anything so it's meaningless.
3681                          */
3682                         if (!rec->found_root_item)
3683                                 continue;
3684                         errors++;
3685                         fprintf(stderr, "fs tree %llu not referenced\n",
3686                                 (unsigned long long)rec->objectid);
3687                 }
3688
3689                 error = 0;
3690                 if (rec->found_ref > 0 && !rec->found_root_item)
3691                         error = 1;
3692                 list_for_each_entry(backref, &rec->backrefs, list) {
3693                         if (!backref->found_dir_item)
3694                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3695                         if (!backref->found_dir_index)
3696                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3697                         if (!backref->found_back_ref)
3698                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3699                         if (!backref->found_forward_ref)
3700                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3701                         if (backref->reachable && backref->errors)
3702                                 error = 1;
3703                 }
3704                 if (!error)
3705                         continue;
3706
3707                 errors++;
3708                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3709                         (unsigned long long)rec->objectid, rec->found_ref,
3710                          rec->found_root_item ? "" : "not found");
3711
3712                 list_for_each_entry(backref, &rec->backrefs, list) {
3713                         if (!backref->reachable)
3714                                 continue;
3715                         if (!backref->errors && rec->found_root_item)
3716                                 continue;
3717                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3718                                 " index %llu namelen %u name %s errors %x\n",
3719                                 (unsigned long long)backref->ref_root,
3720                                 (unsigned long long)backref->dir,
3721                                 (unsigned long long)backref->index,
3722                                 backref->namelen, backref->name,
3723                                 backref->errors);
3724                         print_ref_error(backref->errors);
3725                 }
3726         }
3727         return errors > 0 ? 1 : 0;
3728 }
3729
3730 static int process_root_ref(struct extent_buffer *eb, int slot,
3731                             struct btrfs_key *key,
3732                             struct cache_tree *root_cache)
3733 {
3734         u64 dirid;
3735         u64 index;
3736         u32 len;
3737         u32 name_len;
3738         struct btrfs_root_ref *ref;
3739         char namebuf[BTRFS_NAME_LEN];
3740         int error;
3741
3742         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3743
3744         dirid = btrfs_root_ref_dirid(eb, ref);
3745         index = btrfs_root_ref_sequence(eb, ref);
3746         name_len = btrfs_root_ref_name_len(eb, ref);
3747
3748         if (name_len <= BTRFS_NAME_LEN) {
3749                 len = name_len;
3750                 error = 0;
3751         } else {
3752                 len = BTRFS_NAME_LEN;
3753                 error = REF_ERR_NAME_TOO_LONG;
3754         }
3755         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3756
3757         if (key->type == BTRFS_ROOT_REF_KEY) {
3758                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3759                                  index, namebuf, len, key->type, error);
3760         } else {
3761                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3762                                  index, namebuf, len, key->type, error);
3763         }
3764         return 0;
3765 }
3766
3767 static void free_corrupt_block(struct cache_extent *cache)
3768 {
3769         struct btrfs_corrupt_block *corrupt;
3770
3771         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3772         free(corrupt);
3773 }
3774
3775 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3776
3777 /*
3778  * Repair the btree of the given root.
3779  *
3780  * The fix is to remove the node key in corrupt_blocks cache_tree.
3781  * and rebalance the tree.
3782  * After the fix, the btree should be writeable.
3783  */
3784 static int repair_btree(struct btrfs_root *root,
3785                         struct cache_tree *corrupt_blocks)
3786 {
3787         struct btrfs_trans_handle *trans;
3788         struct btrfs_path path;
3789         struct btrfs_corrupt_block *corrupt;
3790         struct cache_extent *cache;
3791         struct btrfs_key key;
3792         u64 offset;
3793         int level;
3794         int ret = 0;
3795
3796         if (cache_tree_empty(corrupt_blocks))
3797                 return 0;
3798
3799         trans = btrfs_start_transaction(root, 1);
3800         if (IS_ERR(trans)) {
3801                 ret = PTR_ERR(trans);
3802                 fprintf(stderr, "Error starting transaction: %s\n",
3803                         strerror(-ret));
3804                 return ret;
3805         }
3806         btrfs_init_path(&path);
3807         cache = first_cache_extent(corrupt_blocks);
3808         while (cache) {
3809                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3810                                        cache);
3811                 level = corrupt->level;
3812                 path.lowest_level = level;
3813                 key.objectid = corrupt->key.objectid;
3814                 key.type = corrupt->key.type;
3815                 key.offset = corrupt->key.offset;
3816
3817                 /*
3818                  * Here we don't want to do any tree balance, since it may
3819                  * cause a balance with corrupted brother leaf/node,
3820                  * so ins_len set to 0 here.
3821                  * Balance will be done after all corrupt node/leaf is deleted.
3822                  */
3823                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3824                 if (ret < 0)
3825                         goto out;
3826                 offset = btrfs_node_blockptr(path.nodes[level],
3827                                              path.slots[level]);
3828
3829                 /* Remove the ptr */
3830                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3831                 if (ret < 0)
3832                         goto out;
3833                 /*
3834                  * Remove the corresponding extent
3835                  * return value is not concerned.
3836                  */
3837                 btrfs_release_path(&path);
3838                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3839                                         0, root->root_key.objectid,
3840                                         level - 1, 0);
3841                 cache = next_cache_extent(cache);
3842         }
3843
3844         /* Balance the btree using btrfs_search_slot() */
3845         cache = first_cache_extent(corrupt_blocks);
3846         while (cache) {
3847                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3848                                        cache);
3849                 memcpy(&key, &corrupt->key, sizeof(key));
3850                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3851                 if (ret < 0)
3852                         goto out;
3853                 /* return will always >0 since it won't find the item */
3854                 ret = 0;
3855                 btrfs_release_path(&path);
3856                 cache = next_cache_extent(cache);
3857         }
3858 out:
3859         btrfs_commit_transaction(trans, root);
3860         btrfs_release_path(&path);
3861         return ret;
3862 }
3863
3864 static int check_fs_root(struct btrfs_root *root,
3865                          struct cache_tree *root_cache,
3866                          struct walk_control *wc)
3867 {
3868         int ret = 0;
3869         int err = 0;
3870         int wret;
3871         int level;
3872         struct btrfs_path path;
3873         struct shared_node root_node;
3874         struct root_record *rec;
3875         struct btrfs_root_item *root_item = &root->root_item;
3876         struct cache_tree corrupt_blocks;
3877         struct orphan_data_extent *orphan;
3878         struct orphan_data_extent *tmp;
3879         enum btrfs_tree_block_status status;
3880         struct node_refs nrefs;
3881
3882         /*
3883          * Reuse the corrupt_block cache tree to record corrupted tree block
3884          *
3885          * Unlike the usage in extent tree check, here we do it in a per
3886          * fs/subvol tree base.
3887          */
3888         cache_tree_init(&corrupt_blocks);
3889         root->fs_info->corrupt_blocks = &corrupt_blocks;
3890
3891         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3892                 rec = get_root_rec(root_cache, root->root_key.objectid);
3893                 BUG_ON(IS_ERR(rec));
3894                 if (btrfs_root_refs(root_item) > 0)
3895                         rec->found_root_item = 1;
3896         }
3897
3898         btrfs_init_path(&path);
3899         memset(&root_node, 0, sizeof(root_node));
3900         cache_tree_init(&root_node.root_cache);
3901         cache_tree_init(&root_node.inode_cache);
3902         memset(&nrefs, 0, sizeof(nrefs));
3903
3904         /* Move the orphan extent record to corresponding inode_record */
3905         list_for_each_entry_safe(orphan, tmp,
3906                                  &root->orphan_data_extents, list) {
3907                 struct inode_record *inode;
3908
3909                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3910                                       1);
3911                 BUG_ON(IS_ERR(inode));
3912                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3913                 list_move(&orphan->list, &inode->orphan_extents);
3914         }
3915
3916         level = btrfs_header_level(root->node);
3917         memset(wc->nodes, 0, sizeof(wc->nodes));
3918         wc->nodes[level] = &root_node;
3919         wc->active_node = level;
3920         wc->root_level = level;
3921
3922         /* We may not have checked the root block, lets do that now */
3923         if (btrfs_is_leaf(root->node))
3924                 status = btrfs_check_leaf(root, NULL, root->node);
3925         else
3926                 status = btrfs_check_node(root, NULL, root->node);
3927         if (status != BTRFS_TREE_BLOCK_CLEAN)
3928                 return -EIO;
3929
3930         if (btrfs_root_refs(root_item) > 0 ||
3931             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3932                 path.nodes[level] = root->node;
3933                 extent_buffer_get(root->node);
3934                 path.slots[level] = 0;
3935         } else {
3936                 struct btrfs_key key;
3937                 struct btrfs_disk_key found_key;
3938
3939                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3940                 level = root_item->drop_level;
3941                 path.lowest_level = level;
3942                 if (level > btrfs_header_level(root->node) ||
3943                     level >= BTRFS_MAX_LEVEL) {
3944                         error("ignoring invalid drop level: %u", level);
3945                         goto skip_walking;
3946                 }
3947                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3948                 if (wret < 0)
3949                         goto skip_walking;
3950                 btrfs_node_key(path.nodes[level], &found_key,
3951                                 path.slots[level]);
3952                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3953                                         sizeof(found_key)));
3954         }
3955
3956         while (1) {
3957                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3958                 if (wret < 0)
3959                         ret = wret;
3960                 if (wret != 0)
3961                         break;
3962
3963                 wret = walk_up_tree(root, &path, wc, &level);
3964                 if (wret < 0)
3965                         ret = wret;
3966                 if (wret != 0)
3967                         break;
3968         }
3969 skip_walking:
3970         btrfs_release_path(&path);
3971
3972         if (!cache_tree_empty(&corrupt_blocks)) {
3973                 struct cache_extent *cache;
3974                 struct btrfs_corrupt_block *corrupt;
3975
3976                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3977                        root->root_key.objectid);
3978                 cache = first_cache_extent(&corrupt_blocks);
3979                 while (cache) {
3980                         corrupt = container_of(cache,
3981                                                struct btrfs_corrupt_block,
3982                                                cache);
3983                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3984                                cache->start, corrupt->level,
3985                                corrupt->key.objectid, corrupt->key.type,
3986                                corrupt->key.offset);
3987                         cache = next_cache_extent(cache);
3988                 }
3989                 if (repair) {
3990                         printf("Try to repair the btree for root %llu\n",
3991                                root->root_key.objectid);
3992                         ret = repair_btree(root, &corrupt_blocks);
3993                         if (ret < 0)
3994                                 fprintf(stderr, "Failed to repair btree: %s\n",
3995                                         strerror(-ret));
3996                         if (!ret)
3997                                 printf("Btree for root %llu is fixed\n",
3998                                        root->root_key.objectid);
3999                 }
4000         }
4001
4002         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4003         if (err < 0)
4004                 ret = err;
4005
4006         if (root_node.current) {
4007                 root_node.current->checked = 1;
4008                 maybe_free_inode_rec(&root_node.inode_cache,
4009                                 root_node.current);
4010         }
4011
4012         err = check_inode_recs(root, &root_node.inode_cache);
4013         if (!ret)
4014                 ret = err;
4015
4016         free_corrupt_blocks_tree(&corrupt_blocks);
4017         root->fs_info->corrupt_blocks = NULL;
4018         free_orphan_data_extents(&root->orphan_data_extents);
4019         return ret;
4020 }
4021
4022 static int fs_root_objectid(u64 objectid)
4023 {
4024         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4025             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4026                 return 1;
4027         return is_fstree(objectid);
4028 }
4029
4030 static int check_fs_roots(struct btrfs_root *root,
4031                           struct cache_tree *root_cache)
4032 {
4033         struct btrfs_path path;
4034         struct btrfs_key key;
4035         struct walk_control wc;
4036         struct extent_buffer *leaf, *tree_node;
4037         struct btrfs_root *tmp_root;
4038         struct btrfs_root *tree_root = root->fs_info->tree_root;
4039         int ret;
4040         int err = 0;
4041
4042         if (ctx.progress_enabled) {
4043                 ctx.tp = TASK_FS_ROOTS;
4044                 task_start(ctx.info);
4045         }
4046
4047         /*
4048          * Just in case we made any changes to the extent tree that weren't
4049          * reflected into the free space cache yet.
4050          */
4051         if (repair)
4052                 reset_cached_block_groups(root->fs_info);
4053         memset(&wc, 0, sizeof(wc));
4054         cache_tree_init(&wc.shared);
4055         btrfs_init_path(&path);
4056
4057 again:
4058         key.offset = 0;
4059         key.objectid = 0;
4060         key.type = BTRFS_ROOT_ITEM_KEY;
4061         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4062         if (ret < 0) {
4063                 err = 1;
4064                 goto out;
4065         }
4066         tree_node = tree_root->node;
4067         while (1) {
4068                 if (tree_node != tree_root->node) {
4069                         free_root_recs_tree(root_cache);
4070                         btrfs_release_path(&path);
4071                         goto again;
4072                 }
4073                 leaf = path.nodes[0];
4074                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4075                         ret = btrfs_next_leaf(tree_root, &path);
4076                         if (ret) {
4077                                 if (ret < 0)
4078                                         err = 1;
4079                                 break;
4080                         }
4081                         leaf = path.nodes[0];
4082                 }
4083                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4084                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4085                     fs_root_objectid(key.objectid)) {
4086                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4087                                 tmp_root = btrfs_read_fs_root_no_cache(
4088                                                 root->fs_info, &key);
4089                         } else {
4090                                 key.offset = (u64)-1;
4091                                 tmp_root = btrfs_read_fs_root(
4092                                                 root->fs_info, &key);
4093                         }
4094                         if (IS_ERR(tmp_root)) {
4095                                 err = 1;
4096                                 goto next;
4097                         }
4098                         ret = check_fs_root(tmp_root, root_cache, &wc);
4099                         if (ret == -EAGAIN) {
4100                                 free_root_recs_tree(root_cache);
4101                                 btrfs_release_path(&path);
4102                                 goto again;
4103                         }
4104                         if (ret)
4105                                 err = 1;
4106                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4107                                 btrfs_free_fs_root(tmp_root);
4108                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4109                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4110                         process_root_ref(leaf, path.slots[0], &key,
4111                                          root_cache);
4112                 }
4113 next:
4114                 path.slots[0]++;
4115         }
4116 out:
4117         btrfs_release_path(&path);
4118         if (err)
4119                 free_extent_cache_tree(&wc.shared);
4120         if (!cache_tree_empty(&wc.shared))
4121                 fprintf(stderr, "warning line %d\n", __LINE__);
4122
4123         task_stop(ctx.info);
4124
4125         return err;
4126 }
4127
4128 /*
4129  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4130  * INODE_REF/INODE_EXTREF match.
4131  *
4132  * @root:       the root of the fs/file tree
4133  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4134  * @key:        the key of the DIR_ITEM/DIR_INDEX
4135  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4136  *              distinguish root_dir between normal dir/file
4137  * @name:       the name in the INODE_REF/INODE_EXTREF
4138  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4139  * @mode:       the st_mode of INODE_ITEM
4140  *
4141  * Return 0 if no error occurred.
4142  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4143  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4144  * dir/file.
4145  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4146  * not match for normal dir/file.
4147  */
4148 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4149                          struct btrfs_key *key, u64 index, char *name,
4150                          u32 namelen, u32 mode)
4151 {
4152         struct btrfs_path path;
4153         struct extent_buffer *node;
4154         struct btrfs_dir_item *di;
4155         struct btrfs_key location;
4156         char namebuf[BTRFS_NAME_LEN] = {0};
4157         u32 total;
4158         u32 cur = 0;
4159         u32 len;
4160         u32 name_len;
4161         u32 data_len;
4162         u8 filetype;
4163         int slot;
4164         int ret;
4165
4166         btrfs_init_path(&path);
4167         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4168         if (ret < 0) {
4169                 ret = DIR_ITEM_MISSING;
4170                 goto out;
4171         }
4172
4173         /* Process root dir and goto out*/
4174         if (index == 0) {
4175                 if (ret == 0) {
4176                         ret = ROOT_DIR_ERROR;
4177                         error(
4178                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4179                                 root->objectid,
4180                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4181                                         "REF" : "EXTREF",
4182                                 ref_key->objectid, ref_key->offset,
4183                                 key->type == BTRFS_DIR_ITEM_KEY ?
4184                                         "DIR_ITEM" : "DIR_INDEX");
4185                 } else {
4186                         ret = 0;
4187                 }
4188
4189                 goto out;
4190         }
4191
4192         /* Process normal file/dir */
4193         if (ret > 0) {
4194                 ret = DIR_ITEM_MISSING;
4195                 error(
4196                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4197                         root->objectid,
4198                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4199                         ref_key->objectid, ref_key->offset,
4200                         key->type == BTRFS_DIR_ITEM_KEY ?
4201                                 "DIR_ITEM" : "DIR_INDEX",
4202                         key->objectid, key->offset, namelen, name,
4203                         imode_to_type(mode));
4204                 goto out;
4205         }
4206
4207         /* Check whether inode_id/filetype/name match */
4208         node = path.nodes[0];
4209         slot = path.slots[0];
4210         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4211         total = btrfs_item_size_nr(node, slot);
4212         while (cur < total) {
4213                 ret = DIR_ITEM_MISMATCH;
4214                 name_len = btrfs_dir_name_len(node, di);
4215                 data_len = btrfs_dir_data_len(node, di);
4216
4217                 btrfs_dir_item_key_to_cpu(node, di, &location);
4218                 if (location.objectid != ref_key->objectid ||
4219                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4220                     location.offset != 0)
4221                         goto next;
4222
4223                 filetype = btrfs_dir_type(node, di);
4224                 if (imode_to_type(mode) != filetype)
4225                         goto next;
4226
4227                 if (name_len <= BTRFS_NAME_LEN) {
4228                         len = name_len;
4229                 } else {
4230                         len = BTRFS_NAME_LEN;
4231                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4232                         root->objectid,
4233                         key->type == BTRFS_DIR_ITEM_KEY ?
4234                         "DIR_ITEM" : "DIR_INDEX",
4235                         key->objectid, key->offset, name_len);
4236                 }
4237                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4238                 if (len != namelen || strncmp(namebuf, name, len))
4239                         goto next;
4240
4241                 ret = 0;
4242                 goto out;
4243 next:
4244                 len = sizeof(*di) + name_len + data_len;
4245                 di = (struct btrfs_dir_item *)((char *)di + len);
4246                 cur += len;
4247         }
4248         if (ret == DIR_ITEM_MISMATCH)
4249                 error(
4250                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4251                         root->objectid,
4252                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4253                         ref_key->objectid, ref_key->offset,
4254                         key->type == BTRFS_DIR_ITEM_KEY ?
4255                                 "DIR_ITEM" : "DIR_INDEX",
4256                         key->objectid, key->offset, namelen, name,
4257                         imode_to_type(mode));
4258 out:
4259         btrfs_release_path(&path);
4260         return ret;
4261 }
4262
4263 /*
4264  * Traverse the given INODE_REF and call find_dir_item() to find related
4265  * DIR_ITEM/DIR_INDEX.
4266  *
4267  * @root:       the root of the fs/file tree
4268  * @ref_key:    the key of the INODE_REF
4269  * @refs:       the count of INODE_REF
4270  * @mode:       the st_mode of INODE_ITEM
4271  *
4272  * Return 0 if no error occurred.
4273  */
4274 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4275                            struct extent_buffer *node, int slot, u64 *refs,
4276                            int mode)
4277 {
4278         struct btrfs_key key;
4279         struct btrfs_inode_ref *ref;
4280         char namebuf[BTRFS_NAME_LEN] = {0};
4281         u32 total;
4282         u32 cur = 0;
4283         u32 len;
4284         u32 name_len;
4285         u64 index;
4286         int ret, err = 0;
4287
4288         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4289         total = btrfs_item_size_nr(node, slot);
4290
4291 next:
4292         /* Update inode ref count */
4293         (*refs)++;
4294
4295         index = btrfs_inode_ref_index(node, ref);
4296         name_len = btrfs_inode_ref_name_len(node, ref);
4297         if (name_len <= BTRFS_NAME_LEN) {
4298                 len = name_len;
4299         } else {
4300                 len = BTRFS_NAME_LEN;
4301                 warning("root %llu INODE_REF[%llu %llu] name too long",
4302                         root->objectid, ref_key->objectid, ref_key->offset);
4303         }
4304
4305         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4306
4307         /* Check root dir ref name */
4308         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4309                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4310                       root->objectid, ref_key->objectid, ref_key->offset,
4311                       namebuf);
4312                 err |= ROOT_DIR_ERROR;
4313         }
4314
4315         /* Find related DIR_INDEX */
4316         key.objectid = ref_key->offset;
4317         key.type = BTRFS_DIR_INDEX_KEY;
4318         key.offset = index;
4319         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4320         err |= ret;
4321
4322         /* Find related dir_item */
4323         key.objectid = ref_key->offset;
4324         key.type = BTRFS_DIR_ITEM_KEY;
4325         key.offset = btrfs_name_hash(namebuf, len);
4326         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4327         err |= ret;
4328
4329         len = sizeof(*ref) + name_len;
4330         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4331         cur += len;
4332         if (cur < total)
4333                 goto next;
4334
4335         return err;
4336 }
4337
4338 /*
4339  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4340  * DIR_ITEM/DIR_INDEX.
4341  *
4342  * @root:       the root of the fs/file tree
4343  * @ref_key:    the key of the INODE_EXTREF
4344  * @refs:       the count of INODE_EXTREF
4345  * @mode:       the st_mode of INODE_ITEM
4346  *
4347  * Return 0 if no error occurred.
4348  */
4349 static int check_inode_extref(struct btrfs_root *root,
4350                               struct btrfs_key *ref_key,
4351                               struct extent_buffer *node, int slot, u64 *refs,
4352                               int mode)
4353 {
4354         struct btrfs_key key;
4355         struct btrfs_inode_extref *extref;
4356         char namebuf[BTRFS_NAME_LEN] = {0};
4357         u32 total;
4358         u32 cur = 0;
4359         u32 len;
4360         u32 name_len;
4361         u64 index;
4362         u64 parent;
4363         int ret;
4364         int err = 0;
4365
4366         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4367         total = btrfs_item_size_nr(node, slot);
4368
4369 next:
4370         /* update inode ref count */
4371         (*refs)++;
4372         name_len = btrfs_inode_extref_name_len(node, extref);
4373         index = btrfs_inode_extref_index(node, extref);
4374         parent = btrfs_inode_extref_parent(node, extref);
4375         if (name_len <= BTRFS_NAME_LEN) {
4376                 len = name_len;
4377         } else {
4378                 len = BTRFS_NAME_LEN;
4379                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4380                         root->objectid, ref_key->objectid, ref_key->offset);
4381         }
4382         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4383
4384         /* Check root dir ref name */
4385         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4386                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4387                       root->objectid, ref_key->objectid, ref_key->offset,
4388                       namebuf);
4389                 err |= ROOT_DIR_ERROR;
4390         }
4391
4392         /* find related dir_index */
4393         key.objectid = parent;
4394         key.type = BTRFS_DIR_INDEX_KEY;
4395         key.offset = index;
4396         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4397         err |= ret;
4398
4399         /* find related dir_item */
4400         key.objectid = parent;
4401         key.type = BTRFS_DIR_ITEM_KEY;
4402         key.offset = btrfs_name_hash(namebuf, len);
4403         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4404         err |= ret;
4405
4406         len = sizeof(*extref) + name_len;
4407         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4408         cur += len;
4409
4410         if (cur < total)
4411                 goto next;
4412
4413         return err;
4414 }
4415
4416 /*
4417  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4418  * DIR_ITEM/DIR_INDEX match.
4419  *
4420  * @root:       the root of the fs/file tree
4421  * @key:        the key of the INODE_REF/INODE_EXTREF
4422  * @name:       the name in the INODE_REF/INODE_EXTREF
4423  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4424  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4425  * to (u64)-1
4426  * @ext_ref:    the EXTENDED_IREF feature
4427  *
4428  * Return 0 if no error occurred.
4429  * Return >0 for error bitmap
4430  */
4431 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4432                           char *name, int namelen, u64 index,
4433                           unsigned int ext_ref)
4434 {
4435         struct btrfs_path path;
4436         struct btrfs_inode_ref *ref;
4437         struct btrfs_inode_extref *extref;
4438         struct extent_buffer *node;
4439         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4440         u32 total;
4441         u32 cur = 0;
4442         u32 len;
4443         u32 ref_namelen;
4444         u64 ref_index;
4445         u64 parent;
4446         u64 dir_id;
4447         int slot;
4448         int ret;
4449
4450         btrfs_init_path(&path);
4451         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4452         if (ret) {
4453                 ret = INODE_REF_MISSING;
4454                 goto extref;
4455         }
4456
4457         node = path.nodes[0];
4458         slot = path.slots[0];
4459
4460         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4461         total = btrfs_item_size_nr(node, slot);
4462
4463         /* Iterate all entry of INODE_REF */
4464         while (cur < total) {
4465                 ret = INODE_REF_MISSING;
4466
4467                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4468                 ref_index = btrfs_inode_ref_index(node, ref);
4469                 if (index != (u64)-1 && index != ref_index)
4470                         goto next_ref;
4471
4472                 if (ref_namelen <= BTRFS_NAME_LEN) {
4473                         len = ref_namelen;
4474                 } else {
4475                         len = BTRFS_NAME_LEN;
4476                         warning("root %llu INODE %s[%llu %llu] name too long",
4477                                 root->objectid,
4478                                 key->type == BTRFS_INODE_REF_KEY ?
4479                                         "REF" : "EXTREF",
4480                                 key->objectid, key->offset);
4481                 }
4482                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4483                                    len);
4484
4485                 if (len != namelen || strncmp(ref_namebuf, name, len))
4486                         goto next_ref;
4487
4488                 ret = 0;
4489                 goto out;
4490 next_ref:
4491                 len = sizeof(*ref) + ref_namelen;
4492                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4493                 cur += len;
4494         }
4495
4496 extref:
4497         /* Skip if not support EXTENDED_IREF feature */
4498         if (!ext_ref)
4499                 goto out;
4500
4501         btrfs_release_path(&path);
4502         btrfs_init_path(&path);
4503
4504         dir_id = key->offset;
4505         key->type = BTRFS_INODE_EXTREF_KEY;
4506         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4507
4508         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4509         if (ret) {
4510                 ret = INODE_REF_MISSING;
4511                 goto out;
4512         }
4513
4514         node = path.nodes[0];
4515         slot = path.slots[0];
4516
4517         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4518         cur = 0;
4519         total = btrfs_item_size_nr(node, slot);
4520
4521         /* Iterate all entry of INODE_EXTREF */
4522         while (cur < total) {
4523                 ret = INODE_REF_MISSING;
4524
4525                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4526                 ref_index = btrfs_inode_extref_index(node, extref);
4527                 parent = btrfs_inode_extref_parent(node, extref);
4528                 if (index != (u64)-1 && index != ref_index)
4529                         goto next_extref;
4530
4531                 if (parent != dir_id)
4532                         goto next_extref;
4533
4534                 if (ref_namelen <= BTRFS_NAME_LEN) {
4535                         len = ref_namelen;
4536                 } else {
4537                         len = BTRFS_NAME_LEN;
4538                         warning("root %llu INODE %s[%llu %llu] name too long",
4539                                 root->objectid,
4540                                 key->type == BTRFS_INODE_REF_KEY ?
4541                                         "REF" : "EXTREF",
4542                                 key->objectid, key->offset);
4543                 }
4544                 read_extent_buffer(node, ref_namebuf,
4545                                    (unsigned long)(extref + 1), len);
4546
4547                 if (len != namelen || strncmp(ref_namebuf, name, len))
4548                         goto next_extref;
4549
4550                 ret = 0;
4551                 goto out;
4552
4553 next_extref:
4554                 len = sizeof(*extref) + ref_namelen;
4555                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4556                 cur += len;
4557
4558         }
4559 out:
4560         btrfs_release_path(&path);
4561         return ret;
4562 }
4563
4564 /*
4565  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4566  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4567  *
4568  * @root:       the root of the fs/file tree
4569  * @key:        the key of the INODE_REF/INODE_EXTREF
4570  * @size:       the st_size of the INODE_ITEM
4571  * @ext_ref:    the EXTENDED_IREF feature
4572  *
4573  * Return 0 if no error occurred.
4574  */
4575 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4576                           struct extent_buffer *node, int slot, u64 *size,
4577                           unsigned int ext_ref)
4578 {
4579         struct btrfs_dir_item *di;
4580         struct btrfs_inode_item *ii;
4581         struct btrfs_path path;
4582         struct btrfs_key location;
4583         char namebuf[BTRFS_NAME_LEN] = {0};
4584         u32 total;
4585         u32 cur = 0;
4586         u32 len;
4587         u32 name_len;
4588         u32 data_len;
4589         u8 filetype;
4590         u32 mode;
4591         u64 index;
4592         int ret;
4593         int err = 0;
4594
4595         /*
4596          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4597          * ignore index check.
4598          */
4599         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4600
4601         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4602         total = btrfs_item_size_nr(node, slot);
4603
4604         while (cur < total) {
4605                 data_len = btrfs_dir_data_len(node, di);
4606                 if (data_len)
4607                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4608                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4609                               "DIR_ITEM" : "DIR_INDEX",
4610                               key->objectid, key->offset, data_len);
4611
4612                 name_len = btrfs_dir_name_len(node, di);
4613                 if (name_len <= BTRFS_NAME_LEN) {
4614                         len = name_len;
4615                 } else {
4616                         len = BTRFS_NAME_LEN;
4617                         warning("root %llu %s[%llu %llu] name too long",
4618                                 root->objectid,
4619                                 key->type == BTRFS_DIR_ITEM_KEY ?
4620                                 "DIR_ITEM" : "DIR_INDEX",
4621                                 key->objectid, key->offset);
4622                 }
4623                 (*size) += name_len;
4624
4625                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4626                 filetype = btrfs_dir_type(node, di);
4627
4628                 btrfs_init_path(&path);
4629                 btrfs_dir_item_key_to_cpu(node, di, &location);
4630
4631                 /* Ignore related ROOT_ITEM check */
4632                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4633                         goto next;
4634
4635                 /* Check relative INODE_ITEM(existence/filetype) */
4636                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4637                 if (ret) {
4638                         err |= INODE_ITEM_MISSING;
4639                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4640                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4641                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4642                               key->offset, location.objectid, name_len,
4643                               namebuf, filetype);
4644                         goto next;
4645                 }
4646
4647                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4648                                     struct btrfs_inode_item);
4649                 mode = btrfs_inode_mode(path.nodes[0], ii);
4650
4651                 if (imode_to_type(mode) != filetype) {
4652                         err |= INODE_ITEM_MISMATCH;
4653                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4654                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4655                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4656                               key->offset, name_len, namebuf, filetype);
4657                 }
4658
4659                 /* Check relative INODE_REF/INODE_EXTREF */
4660                 location.type = BTRFS_INODE_REF_KEY;
4661                 location.offset = key->objectid;
4662                 ret = find_inode_ref(root, &location, namebuf, len,
4663                                        index, ext_ref);
4664                 err |= ret;
4665                 if (ret & INODE_REF_MISSING)
4666                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4667                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4668                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4669                               key->offset, name_len, namebuf, filetype);
4670
4671 next:
4672                 btrfs_release_path(&path);
4673                 len = sizeof(*di) + name_len + data_len;
4674                 di = (struct btrfs_dir_item *)((char *)di + len);
4675                 cur += len;
4676
4677                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4678                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4679                               root->objectid, key->objectid, key->offset);
4680                         break;
4681                 }
4682         }
4683
4684         return err;
4685 }
4686
4687 /*
4688  * Check file extent datasum/hole, update the size of the file extents,
4689  * check and update the last offset of the file extent.
4690  *
4691  * @root:       the root of fs/file tree.
4692  * @fkey:       the key of the file extent.
4693  * @nodatasum:  INODE_NODATASUM feature.
4694  * @size:       the sum of all EXTENT_DATA items size for this inode.
4695  * @end:        the offset of the last extent.
4696  *
4697  * Return 0 if no error occurred.
4698  */
4699 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4700                              struct extent_buffer *node, int slot,
4701                              unsigned int nodatasum, u64 *size, u64 *end)
4702 {
4703         struct btrfs_file_extent_item *fi;
4704         u64 disk_bytenr;
4705         u64 disk_num_bytes;
4706         u64 extent_num_bytes;
4707         u64 extent_offset;
4708         u64 csum_found;         /* In byte size, sectorsize aligned */
4709         u64 search_start;       /* Logical range start we search for csum */
4710         u64 search_len;         /* Logical range len we search for csum */
4711         unsigned int extent_type;
4712         unsigned int is_hole;
4713         int compressed = 0;
4714         int ret;
4715         int err = 0;
4716
4717         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4718
4719         /* Check inline extent */
4720         extent_type = btrfs_file_extent_type(node, fi);
4721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4722                 struct btrfs_item *e = btrfs_item_nr(slot);
4723                 u32 item_inline_len;
4724
4725                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4726                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4727                 compressed = btrfs_file_extent_compression(node, fi);
4728                 if (extent_num_bytes == 0) {
4729                         error(
4730                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4731                                 root->objectid, fkey->objectid, fkey->offset);
4732                         err |= FILE_EXTENT_ERROR;
4733                 }
4734                 if (!compressed && extent_num_bytes != item_inline_len) {
4735                         error(
4736                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4737                                 root->objectid, fkey->objectid, fkey->offset,
4738                                 extent_num_bytes, item_inline_len);
4739                         err |= FILE_EXTENT_ERROR;
4740                 }
4741                 *size += extent_num_bytes;
4742                 return err;
4743         }
4744
4745         /* Check extent type */
4746         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4747                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4748                 err |= FILE_EXTENT_ERROR;
4749                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4750                       root->objectid, fkey->objectid, fkey->offset);
4751                 return err;
4752         }
4753
4754         /* Check REG_EXTENT/PREALLOC_EXTENT */
4755         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4756         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4757         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4758         extent_offset = btrfs_file_extent_offset(node, fi);
4759         compressed = btrfs_file_extent_compression(node, fi);
4760         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4761
4762         /*
4763          * Check EXTENT_DATA csum
4764          *
4765          * For plain (uncompressed) extent, we should only check the range
4766          * we're referring to, as it's possible that part of prealloc extent
4767          * has been written, and has csum:
4768          *
4769          * |<--- Original large preallocated extent A ---->|
4770          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4771          *      No csum                         Has csum
4772          *
4773          * For compressed extent, we should check the whole range.
4774          */
4775         if (!compressed) {
4776                 search_start = disk_bytenr + extent_offset;
4777                 search_len = extent_num_bytes;
4778         } else {
4779                 search_start = disk_bytenr;
4780                 search_len = disk_num_bytes;
4781         }
4782         ret = count_csum_range(root, search_start, search_len, &csum_found);
4783         if (csum_found > 0 && nodatasum) {
4784                 err |= ODD_CSUM_ITEM;
4785                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4786                       root->objectid, fkey->objectid, fkey->offset);
4787         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4788                    !is_hole && (ret < 0 || csum_found < search_len)) {
4789                 err |= CSUM_ITEM_MISSING;
4790                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4791                       root->objectid, fkey->objectid, fkey->offset,
4792                       csum_found, search_len);
4793         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4794                 err |= ODD_CSUM_ITEM;
4795                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4796                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4797         }
4798
4799         /* Check EXTENT_DATA hole */
4800         if (no_holes && is_hole) {
4801                 err |= FILE_EXTENT_ERROR;
4802                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4803                       root->objectid, fkey->objectid, fkey->offset);
4804         } else if (!no_holes && *end != fkey->offset) {
4805                 err |= FILE_EXTENT_ERROR;
4806                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4807                       root->objectid, fkey->objectid, fkey->offset);
4808         }
4809
4810         *end += extent_num_bytes;
4811         if (!is_hole)
4812                 *size += extent_num_bytes;
4813
4814         return err;
4815 }
4816
4817 /*
4818  * Check INODE_ITEM and related ITEMs (the same inode number)
4819  * 1. check link count
4820  * 2. check inode ref/extref
4821  * 3. check dir item/index
4822  *
4823  * @ext_ref:    the EXTENDED_IREF feature
4824  *
4825  * Return 0 if no error occurred.
4826  * Return >0 for error or hit the traversal is done(by error bitmap)
4827  */
4828 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4829                             unsigned int ext_ref)
4830 {
4831         struct extent_buffer *node;
4832         struct btrfs_inode_item *ii;
4833         struct btrfs_key key;
4834         u64 inode_id;
4835         u32 mode;
4836         u64 nlink;
4837         u64 nbytes;
4838         u64 isize;
4839         u64 size = 0;
4840         u64 refs = 0;
4841         u64 extent_end = 0;
4842         u64 extent_size = 0;
4843         unsigned int dir;
4844         unsigned int nodatasum;
4845         int slot;
4846         int ret;
4847         int err = 0;
4848
4849         node = path->nodes[0];
4850         slot = path->slots[0];
4851
4852         btrfs_item_key_to_cpu(node, &key, slot);
4853         inode_id = key.objectid;
4854
4855         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4856                 ret = btrfs_next_item(root, path);
4857                 if (ret > 0)
4858                         err |= LAST_ITEM;
4859                 return err;
4860         }
4861
4862         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4863         isize = btrfs_inode_size(node, ii);
4864         nbytes = btrfs_inode_nbytes(node, ii);
4865         mode = btrfs_inode_mode(node, ii);
4866         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4867         nlink = btrfs_inode_nlink(node, ii);
4868         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4869
4870         while (1) {
4871                 ret = btrfs_next_item(root, path);
4872                 if (ret < 0) {
4873                         /* out will fill 'err' rusing current statistics */
4874                         goto out;
4875                 } else if (ret > 0) {
4876                         err |= LAST_ITEM;
4877                         goto out;
4878                 }
4879
4880                 node = path->nodes[0];
4881                 slot = path->slots[0];
4882                 btrfs_item_key_to_cpu(node, &key, slot);
4883                 if (key.objectid != inode_id)
4884                         goto out;
4885
4886                 switch (key.type) {
4887                 case BTRFS_INODE_REF_KEY:
4888                         ret = check_inode_ref(root, &key, node, slot, &refs,
4889                                               mode);
4890                         err |= ret;
4891                         break;
4892                 case BTRFS_INODE_EXTREF_KEY:
4893                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4894                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4895                                         root->objectid, key.objectid,
4896                                         key.offset);
4897                         ret = check_inode_extref(root, &key, node, slot, &refs,
4898                                                  mode);
4899                         err |= ret;
4900                         break;
4901                 case BTRFS_DIR_ITEM_KEY:
4902                 case BTRFS_DIR_INDEX_KEY:
4903                         if (!dir) {
4904                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4905                                         root->objectid, inode_id,
4906                                         imode_to_type(mode), key.objectid,
4907                                         key.offset);
4908                         }
4909                         ret = check_dir_item(root, &key, node, slot, &size,
4910                                              ext_ref);
4911                         err |= ret;
4912                         break;
4913                 case BTRFS_EXTENT_DATA_KEY:
4914                         if (dir) {
4915                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4916                                         root->objectid, inode_id, key.objectid,
4917                                         key.offset);
4918                         }
4919                         ret = check_file_extent(root, &key, node, slot,
4920                                                 nodatasum, &extent_size,
4921                                                 &extent_end);
4922                         err |= ret;
4923                         break;
4924                 case BTRFS_XATTR_ITEM_KEY:
4925                         break;
4926                 default:
4927                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4928                               key.objectid, key.type, key.offset);
4929                 }
4930         }
4931
4932 out:
4933         /* verify INODE_ITEM nlink/isize/nbytes */
4934         if (dir) {
4935                 if (nlink != 1) {
4936                         err |= LINK_COUNT_ERROR;
4937                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4938                               root->objectid, inode_id, nlink);
4939                 }
4940
4941                 /*
4942                  * Just a warning, as dir inode nbytes is just an
4943                  * instructive value.
4944                  */
4945                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4946                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4947                                 root->objectid, inode_id, root->nodesize);
4948                 }
4949
4950                 if (isize != size) {
4951                         err |= ISIZE_ERROR;
4952                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4953                               root->objectid, inode_id, isize, size);
4954                 }
4955         } else {
4956                 if (nlink != refs) {
4957                         err |= LINK_COUNT_ERROR;
4958                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4959                               root->objectid, inode_id, nlink, refs);
4960                 } else if (!nlink) {
4961                         err |= ORPHAN_ITEM;
4962                 }
4963
4964                 if (!nbytes && !no_holes && extent_end < isize) {
4965                         err |= NBYTES_ERROR;
4966                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4967                               root->objectid, inode_id, isize);
4968                 }
4969
4970                 if (nbytes != extent_size) {
4971                         err |= NBYTES_ERROR;
4972                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4973                               root->objectid, inode_id, nbytes, extent_size);
4974                 }
4975         }
4976
4977         return err;
4978 }
4979
4980 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4981 {
4982         struct btrfs_path path;
4983         struct btrfs_key key;
4984         int err = 0;
4985         int ret;
4986
4987         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4988         key.type = BTRFS_INODE_ITEM_KEY;
4989         key.offset = 0;
4990
4991         /* For root being dropped, we don't need to check first inode */
4992         if (btrfs_root_refs(&root->root_item) == 0 &&
4993             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4994             key.objectid)
4995                 return 0;
4996
4997         btrfs_init_path(&path);
4998
4999         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5000         if (ret < 0)
5001                 goto out;
5002         if (ret > 0) {
5003                 ret = 0;
5004                 err |= INODE_ITEM_MISSING;
5005                 error("first inode item of root %llu is missing",
5006                       root->objectid);
5007         }
5008
5009         err |= check_inode_item(root, &path, ext_ref);
5010         err &= ~LAST_ITEM;
5011         if (err && !ret)
5012                 ret = -EIO;
5013 out:
5014         btrfs_release_path(&path);
5015         return ret;
5016 }
5017
5018 /*
5019  * Iterate all item on the tree and call check_inode_item() to check.
5020  *
5021  * @root:       the root of the tree to be checked.
5022  * @ext_ref:    the EXTENDED_IREF feature
5023  *
5024  * Return 0 if no error found.
5025  * Return <0 for error.
5026  */
5027 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5028 {
5029         struct btrfs_path path;
5030         struct node_refs nrefs;
5031         struct btrfs_root_item *root_item = &root->root_item;
5032         int ret;
5033         int level;
5034         int err = 0;
5035
5036         /*
5037          * We need to manually check the first inode item(256)
5038          * As the following traversal function will only start from
5039          * the first inode item in the leaf, if inode item(256) is missing
5040          * we will just skip it forever.
5041          */
5042         ret = check_fs_first_inode(root, ext_ref);
5043         if (ret < 0)
5044                 return ret;
5045
5046         memset(&nrefs, 0, sizeof(nrefs));
5047         level = btrfs_header_level(root->node);
5048         btrfs_init_path(&path);
5049
5050         if (btrfs_root_refs(root_item) > 0 ||
5051             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5052                 path.nodes[level] = root->node;
5053                 path.slots[level] = 0;
5054                 extent_buffer_get(root->node);
5055         } else {
5056                 struct btrfs_key key;
5057
5058                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5059                 level = root_item->drop_level;
5060                 path.lowest_level = level;
5061                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5062                 if (ret < 0)
5063                         goto out;
5064                 ret = 0;
5065         }
5066
5067         while (1) {
5068                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5069                 err |= !!ret;
5070
5071                 /* if ret is negative, walk shall stop */
5072                 if (ret < 0) {
5073                         ret = err;
5074                         break;
5075                 }
5076
5077                 ret = walk_up_tree_v2(root, &path, &level);
5078                 if (ret != 0) {
5079                         /* Normal exit, reset ret to err */
5080                         ret = err;
5081                         break;
5082                 }
5083         }
5084
5085 out:
5086         btrfs_release_path(&path);
5087         return ret;
5088 }
5089
5090 /*
5091  * Find the relative ref for root_ref and root_backref.
5092  *
5093  * @root:       the root of the root tree.
5094  * @ref_key:    the key of the root ref.
5095  *
5096  * Return 0 if no error occurred.
5097  */
5098 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5099                           struct extent_buffer *node, int slot)
5100 {
5101         struct btrfs_path path;
5102         struct btrfs_key key;
5103         struct btrfs_root_ref *ref;
5104         struct btrfs_root_ref *backref;
5105         char ref_name[BTRFS_NAME_LEN] = {0};
5106         char backref_name[BTRFS_NAME_LEN] = {0};
5107         u64 ref_dirid;
5108         u64 ref_seq;
5109         u32 ref_namelen;
5110         u64 backref_dirid;
5111         u64 backref_seq;
5112         u32 backref_namelen;
5113         u32 len;
5114         int ret;
5115         int err = 0;
5116
5117         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5118         ref_dirid = btrfs_root_ref_dirid(node, ref);
5119         ref_seq = btrfs_root_ref_sequence(node, ref);
5120         ref_namelen = btrfs_root_ref_name_len(node, ref);
5121
5122         if (ref_namelen <= BTRFS_NAME_LEN) {
5123                 len = ref_namelen;
5124         } else {
5125                 len = BTRFS_NAME_LEN;
5126                 warning("%s[%llu %llu] ref_name too long",
5127                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5128                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5129                         ref_key->offset);
5130         }
5131         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5132
5133         /* Find relative root_ref */
5134         key.objectid = ref_key->offset;
5135         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5136         key.offset = ref_key->objectid;
5137
5138         btrfs_init_path(&path);
5139         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5140         if (ret) {
5141                 err |= ROOT_REF_MISSING;
5142                 error("%s[%llu %llu] couldn't find relative ref",
5143                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5144                       "ROOT_REF" : "ROOT_BACKREF",
5145                       ref_key->objectid, ref_key->offset);
5146                 goto out;
5147         }
5148
5149         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5150                                  struct btrfs_root_ref);
5151         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5152         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5153         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5154
5155         if (backref_namelen <= BTRFS_NAME_LEN) {
5156                 len = backref_namelen;
5157         } else {
5158                 len = BTRFS_NAME_LEN;
5159                 warning("%s[%llu %llu] ref_name too long",
5160                         key.type == BTRFS_ROOT_REF_KEY ?
5161                         "ROOT_REF" : "ROOT_BACKREF",
5162                         key.objectid, key.offset);
5163         }
5164         read_extent_buffer(path.nodes[0], backref_name,
5165                            (unsigned long)(backref + 1), len);
5166
5167         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5168             ref_namelen != backref_namelen ||
5169             strncmp(ref_name, backref_name, len)) {
5170                 err |= ROOT_REF_MISMATCH;
5171                 error("%s[%llu %llu] mismatch relative ref",
5172                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5173                       "ROOT_REF" : "ROOT_BACKREF",
5174                       ref_key->objectid, ref_key->offset);
5175         }
5176 out:
5177         btrfs_release_path(&path);
5178         return err;
5179 }
5180
5181 /*
5182  * Check all fs/file tree in low_memory mode.
5183  *
5184  * 1. for fs tree root item, call check_fs_root_v2()
5185  * 2. for fs tree root ref/backref, call check_root_ref()
5186  *
5187  * Return 0 if no error occurred.
5188  */
5189 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5190 {
5191         struct btrfs_root *tree_root = fs_info->tree_root;
5192         struct btrfs_root *cur_root = NULL;
5193         struct btrfs_path path;
5194         struct btrfs_key key;
5195         struct extent_buffer *node;
5196         unsigned int ext_ref;
5197         int slot;
5198         int ret;
5199         int err = 0;
5200
5201         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5202
5203         btrfs_init_path(&path);
5204         key.objectid = BTRFS_FS_TREE_OBJECTID;
5205         key.offset = 0;
5206         key.type = BTRFS_ROOT_ITEM_KEY;
5207
5208         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5209         if (ret < 0) {
5210                 err = ret;
5211                 goto out;
5212         } else if (ret > 0) {
5213                 err = -ENOENT;
5214                 goto out;
5215         }
5216
5217         while (1) {
5218                 node = path.nodes[0];
5219                 slot = path.slots[0];
5220                 btrfs_item_key_to_cpu(node, &key, slot);
5221                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5222                         goto out;
5223                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5224                     fs_root_objectid(key.objectid)) {
5225                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5226                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5227                                                                        &key);
5228                         } else {
5229                                 key.offset = (u64)-1;
5230                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5231                         }
5232
5233                         if (IS_ERR(cur_root)) {
5234                                 error("Fail to read fs/subvol tree: %lld",
5235                                       key.objectid);
5236                                 err = -EIO;
5237                                 goto next;
5238                         }
5239
5240                         ret = check_fs_root_v2(cur_root, ext_ref);
5241                         err |= ret;
5242
5243                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5244                                 btrfs_free_fs_root(cur_root);
5245                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5246                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5247                         ret = check_root_ref(tree_root, &key, node, slot);
5248                         err |= ret;
5249                 }
5250 next:
5251                 ret = btrfs_next_item(tree_root, &path);
5252                 if (ret > 0)
5253                         goto out;
5254                 if (ret < 0) {
5255                         err = ret;
5256                         goto out;
5257                 }
5258         }
5259
5260 out:
5261         btrfs_release_path(&path);
5262         return err;
5263 }
5264
5265 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5266 {
5267         struct list_head *cur = rec->backrefs.next;
5268         struct extent_backref *back;
5269         struct tree_backref *tback;
5270         struct data_backref *dback;
5271         u64 found = 0;
5272         int err = 0;
5273
5274         while(cur != &rec->backrefs) {
5275                 back = to_extent_backref(cur);
5276                 cur = cur->next;
5277                 if (!back->found_extent_tree) {
5278                         err = 1;
5279                         if (!print_errs)
5280                                 goto out;
5281                         if (back->is_data) {
5282                                 dback = to_data_backref(back);
5283                                 fprintf(stderr, "Backref %llu %s %llu"
5284                                         " owner %llu offset %llu num_refs %lu"
5285                                         " not found in extent tree\n",
5286                                         (unsigned long long)rec->start,
5287                                         back->full_backref ?
5288                                         "parent" : "root",
5289                                         back->full_backref ?
5290                                         (unsigned long long)dback->parent:
5291                                         (unsigned long long)dback->root,
5292                                         (unsigned long long)dback->owner,
5293                                         (unsigned long long)dback->offset,
5294                                         (unsigned long)dback->num_refs);
5295                         } else {
5296                                 tback = to_tree_backref(back);
5297                                 fprintf(stderr, "Backref %llu parent %llu"
5298                                         " root %llu not found in extent tree\n",
5299                                         (unsigned long long)rec->start,
5300                                         (unsigned long long)tback->parent,
5301                                         (unsigned long long)tback->root);
5302                         }
5303                 }
5304                 if (!back->is_data && !back->found_ref) {
5305                         err = 1;
5306                         if (!print_errs)
5307                                 goto out;
5308                         tback = to_tree_backref(back);
5309                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5310                                 (unsigned long long)rec->start,
5311                                 back->full_backref ? "parent" : "root",
5312                                 back->full_backref ?
5313                                 (unsigned long long)tback->parent :
5314                                 (unsigned long long)tback->root, back);
5315                 }
5316                 if (back->is_data) {
5317                         dback = to_data_backref(back);
5318                         if (dback->found_ref != dback->num_refs) {
5319                                 err = 1;
5320                                 if (!print_errs)
5321                                         goto out;
5322                                 fprintf(stderr, "Incorrect local backref count"
5323                                         " on %llu %s %llu owner %llu"
5324                                         " offset %llu found %u wanted %u back %p\n",
5325                                         (unsigned long long)rec->start,
5326                                         back->full_backref ?
5327                                         "parent" : "root",
5328                                         back->full_backref ?
5329                                         (unsigned long long)dback->parent:
5330                                         (unsigned long long)dback->root,
5331                                         (unsigned long long)dback->owner,
5332                                         (unsigned long long)dback->offset,
5333                                         dback->found_ref, dback->num_refs, back);
5334                         }
5335                         if (dback->disk_bytenr != rec->start) {
5336                                 err = 1;
5337                                 if (!print_errs)
5338                                         goto out;
5339                                 fprintf(stderr, "Backref disk bytenr does not"
5340                                         " match extent record, bytenr=%llu, "
5341                                         "ref bytenr=%llu\n",
5342                                         (unsigned long long)rec->start,
5343                                         (unsigned long long)dback->disk_bytenr);
5344                         }
5345
5346                         if (dback->bytes != rec->nr) {
5347                                 err = 1;
5348                                 if (!print_errs)
5349                                         goto out;
5350                                 fprintf(stderr, "Backref bytes do not match "
5351                                         "extent backref, bytenr=%llu, ref "
5352                                         "bytes=%llu, backref bytes=%llu\n",
5353                                         (unsigned long long)rec->start,
5354                                         (unsigned long long)rec->nr,
5355                                         (unsigned long long)dback->bytes);
5356                         }
5357                 }
5358                 if (!back->is_data) {
5359                         found += 1;
5360                 } else {
5361                         dback = to_data_backref(back);
5362                         found += dback->found_ref;
5363                 }
5364         }
5365         if (found != rec->refs) {
5366                 err = 1;
5367                 if (!print_errs)
5368                         goto out;
5369                 fprintf(stderr, "Incorrect global backref count "
5370                         "on %llu found %llu wanted %llu\n",
5371                         (unsigned long long)rec->start,
5372                         (unsigned long long)found,
5373                         (unsigned long long)rec->refs);
5374         }
5375 out:
5376         return err;
5377 }
5378
5379 static int free_all_extent_backrefs(struct extent_record *rec)
5380 {
5381         struct extent_backref *back;
5382         struct list_head *cur;
5383         while (!list_empty(&rec->backrefs)) {
5384                 cur = rec->backrefs.next;
5385                 back = to_extent_backref(cur);
5386                 list_del(cur);
5387                 free(back);
5388         }
5389         return 0;
5390 }
5391
5392 static void free_extent_record_cache(struct cache_tree *extent_cache)
5393 {
5394         struct cache_extent *cache;
5395         struct extent_record *rec;
5396
5397         while (1) {
5398                 cache = first_cache_extent(extent_cache);
5399                 if (!cache)
5400                         break;
5401                 rec = container_of(cache, struct extent_record, cache);
5402                 remove_cache_extent(extent_cache, cache);
5403                 free_all_extent_backrefs(rec);
5404                 free(rec);
5405         }
5406 }
5407
5408 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5409                                  struct extent_record *rec)
5410 {
5411         if (rec->content_checked && rec->owner_ref_checked &&
5412             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5413             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5414             !rec->bad_full_backref && !rec->crossing_stripes &&
5415             !rec->wrong_chunk_type) {
5416                 remove_cache_extent(extent_cache, &rec->cache);
5417                 free_all_extent_backrefs(rec);
5418                 list_del_init(&rec->list);
5419                 free(rec);
5420         }
5421         return 0;
5422 }
5423
5424 static int check_owner_ref(struct btrfs_root *root,
5425                             struct extent_record *rec,
5426                             struct extent_buffer *buf)
5427 {
5428         struct extent_backref *node;
5429         struct tree_backref *back;
5430         struct btrfs_root *ref_root;
5431         struct btrfs_key key;
5432         struct btrfs_path path;
5433         struct extent_buffer *parent;
5434         int level;
5435         int found = 0;
5436         int ret;
5437
5438         list_for_each_entry(node, &rec->backrefs, list) {
5439                 if (node->is_data)
5440                         continue;
5441                 if (!node->found_ref)
5442                         continue;
5443                 if (node->full_backref)
5444                         continue;
5445                 back = to_tree_backref(node);
5446                 if (btrfs_header_owner(buf) == back->root)
5447                         return 0;
5448         }
5449         BUG_ON(rec->is_root);
5450
5451         /* try to find the block by search corresponding fs tree */
5452         key.objectid = btrfs_header_owner(buf);
5453         key.type = BTRFS_ROOT_ITEM_KEY;
5454         key.offset = (u64)-1;
5455
5456         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5457         if (IS_ERR(ref_root))
5458                 return 1;
5459
5460         level = btrfs_header_level(buf);
5461         if (level == 0)
5462                 btrfs_item_key_to_cpu(buf, &key, 0);
5463         else
5464                 btrfs_node_key_to_cpu(buf, &key, 0);
5465
5466         btrfs_init_path(&path);
5467         path.lowest_level = level + 1;
5468         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5469         if (ret < 0)
5470                 return 0;
5471
5472         parent = path.nodes[level + 1];
5473         if (parent && buf->start == btrfs_node_blockptr(parent,
5474                                                         path.slots[level + 1]))
5475                 found = 1;
5476
5477         btrfs_release_path(&path);
5478         return found ? 0 : 1;
5479 }
5480
5481 static int is_extent_tree_record(struct extent_record *rec)
5482 {
5483         struct list_head *cur = rec->backrefs.next;
5484         struct extent_backref *node;
5485         struct tree_backref *back;
5486         int is_extent = 0;
5487
5488         while(cur != &rec->backrefs) {
5489                 node = to_extent_backref(cur);
5490                 cur = cur->next;
5491                 if (node->is_data)
5492                         return 0;
5493                 back = to_tree_backref(node);
5494                 if (node->full_backref)
5495                         return 0;
5496                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5497                         is_extent = 1;
5498         }
5499         return is_extent;
5500 }
5501
5502
5503 static int record_bad_block_io(struct btrfs_fs_info *info,
5504                                struct cache_tree *extent_cache,
5505                                u64 start, u64 len)
5506 {
5507         struct extent_record *rec;
5508         struct cache_extent *cache;
5509         struct btrfs_key key;
5510
5511         cache = lookup_cache_extent(extent_cache, start, len);
5512         if (!cache)
5513                 return 0;
5514
5515         rec = container_of(cache, struct extent_record, cache);
5516         if (!is_extent_tree_record(rec))
5517                 return 0;
5518
5519         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5520         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5521 }
5522
5523 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5524                        struct extent_buffer *buf, int slot)
5525 {
5526         if (btrfs_header_level(buf)) {
5527                 struct btrfs_key_ptr ptr1, ptr2;
5528
5529                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5530                                    sizeof(struct btrfs_key_ptr));
5531                 read_extent_buffer(buf, &ptr2,
5532                                    btrfs_node_key_ptr_offset(slot + 1),
5533                                    sizeof(struct btrfs_key_ptr));
5534                 write_extent_buffer(buf, &ptr1,
5535                                     btrfs_node_key_ptr_offset(slot + 1),
5536                                     sizeof(struct btrfs_key_ptr));
5537                 write_extent_buffer(buf, &ptr2,
5538                                     btrfs_node_key_ptr_offset(slot),
5539                                     sizeof(struct btrfs_key_ptr));
5540                 if (slot == 0) {
5541                         struct btrfs_disk_key key;
5542                         btrfs_node_key(buf, &key, 0);
5543                         btrfs_fixup_low_keys(root, path, &key,
5544                                              btrfs_header_level(buf) + 1);
5545                 }
5546         } else {
5547                 struct btrfs_item *item1, *item2;
5548                 struct btrfs_key k1, k2;
5549                 char *item1_data, *item2_data;
5550                 u32 item1_offset, item2_offset, item1_size, item2_size;
5551
5552                 item1 = btrfs_item_nr(slot);
5553                 item2 = btrfs_item_nr(slot + 1);
5554                 btrfs_item_key_to_cpu(buf, &k1, slot);
5555                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5556                 item1_offset = btrfs_item_offset(buf, item1);
5557                 item2_offset = btrfs_item_offset(buf, item2);
5558                 item1_size = btrfs_item_size(buf, item1);
5559                 item2_size = btrfs_item_size(buf, item2);
5560
5561                 item1_data = malloc(item1_size);
5562                 if (!item1_data)
5563                         return -ENOMEM;
5564                 item2_data = malloc(item2_size);
5565                 if (!item2_data) {
5566                         free(item1_data);
5567                         return -ENOMEM;
5568                 }
5569
5570                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5571                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5572
5573                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5574                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5575                 free(item1_data);
5576                 free(item2_data);
5577
5578                 btrfs_set_item_offset(buf, item1, item2_offset);
5579                 btrfs_set_item_offset(buf, item2, item1_offset);
5580                 btrfs_set_item_size(buf, item1, item2_size);
5581                 btrfs_set_item_size(buf, item2, item1_size);
5582
5583                 path->slots[0] = slot;
5584                 btrfs_set_item_key_unsafe(root, path, &k2);
5585                 path->slots[0] = slot + 1;
5586                 btrfs_set_item_key_unsafe(root, path, &k1);
5587         }
5588         return 0;
5589 }
5590
5591 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5592 {
5593         struct extent_buffer *buf;
5594         struct btrfs_key k1, k2;
5595         int i;
5596         int level = path->lowest_level;
5597         int ret = -EIO;
5598
5599         buf = path->nodes[level];
5600         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5601                 if (level) {
5602                         btrfs_node_key_to_cpu(buf, &k1, i);
5603                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5604                 } else {
5605                         btrfs_item_key_to_cpu(buf, &k1, i);
5606                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5607                 }
5608                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5609                         continue;
5610                 ret = swap_values(root, path, buf, i);
5611                 if (ret)
5612                         break;
5613                 btrfs_mark_buffer_dirty(buf);
5614                 i = 0;
5615         }
5616         return ret;
5617 }
5618
5619 static int delete_bogus_item(struct btrfs_root *root,
5620                              struct btrfs_path *path,
5621                              struct extent_buffer *buf, int slot)
5622 {
5623         struct btrfs_key key;
5624         int nritems = btrfs_header_nritems(buf);
5625
5626         btrfs_item_key_to_cpu(buf, &key, slot);
5627
5628         /* These are all the keys we can deal with missing. */
5629         if (key.type != BTRFS_DIR_INDEX_KEY &&
5630             key.type != BTRFS_EXTENT_ITEM_KEY &&
5631             key.type != BTRFS_METADATA_ITEM_KEY &&
5632             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5633             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5634                 return -1;
5635
5636         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5637                (unsigned long long)key.objectid, key.type,
5638                (unsigned long long)key.offset, slot, buf->start);
5639         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5640                               btrfs_item_nr_offset(slot + 1),
5641                               sizeof(struct btrfs_item) *
5642                               (nritems - slot - 1));
5643         btrfs_set_header_nritems(buf, nritems - 1);
5644         if (slot == 0) {
5645                 struct btrfs_disk_key disk_key;
5646
5647                 btrfs_item_key(buf, &disk_key, 0);
5648                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5649         }
5650         btrfs_mark_buffer_dirty(buf);
5651         return 0;
5652 }
5653
5654 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5655 {
5656         struct extent_buffer *buf;
5657         int i;
5658         int ret = 0;
5659
5660         /* We should only get this for leaves */
5661         BUG_ON(path->lowest_level);
5662         buf = path->nodes[0];
5663 again:
5664         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5665                 unsigned int shift = 0, offset;
5666
5667                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5668                     BTRFS_LEAF_DATA_SIZE(root)) {
5669                         if (btrfs_item_end_nr(buf, i) >
5670                             BTRFS_LEAF_DATA_SIZE(root)) {
5671                                 ret = delete_bogus_item(root, path, buf, i);
5672                                 if (!ret)
5673                                         goto again;
5674                                 fprintf(stderr, "item is off the end of the "
5675                                         "leaf, can't fix\n");
5676                                 ret = -EIO;
5677                                 break;
5678                         }
5679                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5680                                 btrfs_item_end_nr(buf, i);
5681                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5682                            btrfs_item_offset_nr(buf, i - 1)) {
5683                         if (btrfs_item_end_nr(buf, i) >
5684                             btrfs_item_offset_nr(buf, i - 1)) {
5685                                 ret = delete_bogus_item(root, path, buf, i);
5686                                 if (!ret)
5687                                         goto again;
5688                                 fprintf(stderr, "items overlap, can't fix\n");
5689                                 ret = -EIO;
5690                                 break;
5691                         }
5692                         shift = btrfs_item_offset_nr(buf, i - 1) -
5693                                 btrfs_item_end_nr(buf, i);
5694                 }
5695                 if (!shift)
5696                         continue;
5697
5698                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5699                        i, shift, (unsigned long long)buf->start);
5700                 offset = btrfs_item_offset_nr(buf, i);
5701                 memmove_extent_buffer(buf,
5702                                       btrfs_leaf_data(buf) + offset + shift,
5703                                       btrfs_leaf_data(buf) + offset,
5704                                       btrfs_item_size_nr(buf, i));
5705                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5706                                       offset + shift);
5707                 btrfs_mark_buffer_dirty(buf);
5708         }
5709
5710         /*
5711          * We may have moved things, in which case we want to exit so we don't
5712          * write those changes out.  Once we have proper abort functionality in
5713          * progs this can be changed to something nicer.
5714          */
5715         BUG_ON(ret);
5716         return ret;
5717 }
5718
5719 /*
5720  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5721  * then just return -EIO.
5722  */
5723 static int try_to_fix_bad_block(struct btrfs_root *root,
5724                                 struct extent_buffer *buf,
5725                                 enum btrfs_tree_block_status status)
5726 {
5727         struct btrfs_trans_handle *trans;
5728         struct ulist *roots;
5729         struct ulist_node *node;
5730         struct btrfs_root *search_root;
5731         struct btrfs_path path;
5732         struct ulist_iterator iter;
5733         struct btrfs_key root_key, key;
5734         int ret;
5735
5736         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5737             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5738                 return -EIO;
5739
5740         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5741         if (ret)
5742                 return -EIO;
5743
5744         btrfs_init_path(&path);
5745         ULIST_ITER_INIT(&iter);
5746         while ((node = ulist_next(roots, &iter))) {
5747                 root_key.objectid = node->val;
5748                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5749                 root_key.offset = (u64)-1;
5750
5751                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5752                 if (IS_ERR(root)) {
5753                         ret = -EIO;
5754                         break;
5755                 }
5756
5757
5758                 trans = btrfs_start_transaction(search_root, 0);
5759                 if (IS_ERR(trans)) {
5760                         ret = PTR_ERR(trans);
5761                         break;
5762                 }
5763
5764                 path.lowest_level = btrfs_header_level(buf);
5765                 path.skip_check_block = 1;
5766                 if (path.lowest_level)
5767                         btrfs_node_key_to_cpu(buf, &key, 0);
5768                 else
5769                         btrfs_item_key_to_cpu(buf, &key, 0);
5770                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5771                 if (ret) {
5772                         ret = -EIO;
5773                         btrfs_commit_transaction(trans, search_root);
5774                         break;
5775                 }
5776                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5777                         ret = fix_key_order(search_root, &path);
5778                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5779                         ret = fix_item_offset(search_root, &path);
5780                 if (ret) {
5781                         btrfs_commit_transaction(trans, search_root);
5782                         break;
5783                 }
5784                 btrfs_release_path(&path);
5785                 btrfs_commit_transaction(trans, search_root);
5786         }
5787         ulist_free(roots);
5788         btrfs_release_path(&path);
5789         return ret;
5790 }
5791
5792 static int check_block(struct btrfs_root *root,
5793                        struct cache_tree *extent_cache,
5794                        struct extent_buffer *buf, u64 flags)
5795 {
5796         struct extent_record *rec;
5797         struct cache_extent *cache;
5798         struct btrfs_key key;
5799         enum btrfs_tree_block_status status;
5800         int ret = 0;
5801         int level;
5802
5803         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5804         if (!cache)
5805                 return 1;
5806         rec = container_of(cache, struct extent_record, cache);
5807         rec->generation = btrfs_header_generation(buf);
5808
5809         level = btrfs_header_level(buf);
5810         if (btrfs_header_nritems(buf) > 0) {
5811
5812                 if (level == 0)
5813                         btrfs_item_key_to_cpu(buf, &key, 0);
5814                 else
5815                         btrfs_node_key_to_cpu(buf, &key, 0);
5816
5817                 rec->info_objectid = key.objectid;
5818         }
5819         rec->info_level = level;
5820
5821         if (btrfs_is_leaf(buf))
5822                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5823         else
5824                 status = btrfs_check_node(root, &rec->parent_key, buf);
5825
5826         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5827                 if (repair)
5828                         status = try_to_fix_bad_block(root, buf, status);
5829                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5830                         ret = -EIO;
5831                         fprintf(stderr, "bad block %llu\n",
5832                                 (unsigned long long)buf->start);
5833                 } else {
5834                         /*
5835                          * Signal to callers we need to start the scan over
5836                          * again since we'll have cowed blocks.
5837                          */
5838                         ret = -EAGAIN;
5839                 }
5840         } else {
5841                 rec->content_checked = 1;
5842                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5843                         rec->owner_ref_checked = 1;
5844                 else {
5845                         ret = check_owner_ref(root, rec, buf);
5846                         if (!ret)
5847                                 rec->owner_ref_checked = 1;
5848                 }
5849         }
5850         if (!ret)
5851                 maybe_free_extent_rec(extent_cache, rec);
5852         return ret;
5853 }
5854
5855 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5856                                                 u64 parent, u64 root)
5857 {
5858         struct list_head *cur = rec->backrefs.next;
5859         struct extent_backref *node;
5860         struct tree_backref *back;
5861
5862         while(cur != &rec->backrefs) {
5863                 node = to_extent_backref(cur);
5864                 cur = cur->next;
5865                 if (node->is_data)
5866                         continue;
5867                 back = to_tree_backref(node);
5868                 if (parent > 0) {
5869                         if (!node->full_backref)
5870                                 continue;
5871                         if (parent == back->parent)
5872                                 return back;
5873                 } else {
5874                         if (node->full_backref)
5875                                 continue;
5876                         if (back->root == root)
5877                                 return back;
5878                 }
5879         }
5880         return NULL;
5881 }
5882
5883 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5884                                                 u64 parent, u64 root)
5885 {
5886         struct tree_backref *ref = malloc(sizeof(*ref));
5887
5888         if (!ref)
5889                 return NULL;
5890         memset(&ref->node, 0, sizeof(ref->node));
5891         if (parent > 0) {
5892                 ref->parent = parent;
5893                 ref->node.full_backref = 1;
5894         } else {
5895                 ref->root = root;
5896                 ref->node.full_backref = 0;
5897         }
5898         list_add_tail(&ref->node.list, &rec->backrefs);
5899
5900         return ref;
5901 }
5902
5903 static struct data_backref *find_data_backref(struct extent_record *rec,
5904                                                 u64 parent, u64 root,
5905                                                 u64 owner, u64 offset,
5906                                                 int found_ref,
5907                                                 u64 disk_bytenr, u64 bytes)
5908 {
5909         struct list_head *cur = rec->backrefs.next;
5910         struct extent_backref *node;
5911         struct data_backref *back;
5912
5913         while(cur != &rec->backrefs) {
5914                 node = to_extent_backref(cur);
5915                 cur = cur->next;
5916                 if (!node->is_data)
5917                         continue;
5918                 back = to_data_backref(node);
5919                 if (parent > 0) {
5920                         if (!node->full_backref)
5921                                 continue;
5922                         if (parent == back->parent)
5923                                 return back;
5924                 } else {
5925                         if (node->full_backref)
5926                                 continue;
5927                         if (back->root == root && back->owner == owner &&
5928                             back->offset == offset) {
5929                                 if (found_ref && node->found_ref &&
5930                                     (back->bytes != bytes ||
5931                                     back->disk_bytenr != disk_bytenr))
5932                                         continue;
5933                                 return back;
5934                         }
5935                 }
5936         }
5937         return NULL;
5938 }
5939
5940 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5941                                                 u64 parent, u64 root,
5942                                                 u64 owner, u64 offset,
5943                                                 u64 max_size)
5944 {
5945         struct data_backref *ref = malloc(sizeof(*ref));
5946
5947         if (!ref)
5948                 return NULL;
5949         memset(&ref->node, 0, sizeof(ref->node));
5950         ref->node.is_data = 1;
5951
5952         if (parent > 0) {
5953                 ref->parent = parent;
5954                 ref->owner = 0;
5955                 ref->offset = 0;
5956                 ref->node.full_backref = 1;
5957         } else {
5958                 ref->root = root;
5959                 ref->owner = owner;
5960                 ref->offset = offset;
5961                 ref->node.full_backref = 0;
5962         }
5963         ref->bytes = max_size;
5964         ref->found_ref = 0;
5965         ref->num_refs = 0;
5966         list_add_tail(&ref->node.list, &rec->backrefs);
5967         if (max_size > rec->max_size)
5968                 rec->max_size = max_size;
5969         return ref;
5970 }
5971
5972 /* Check if the type of extent matches with its chunk */
5973 static void check_extent_type(struct extent_record *rec)
5974 {
5975         struct btrfs_block_group_cache *bg_cache;
5976
5977         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5978         if (!bg_cache)
5979                 return;
5980
5981         /* data extent, check chunk directly*/
5982         if (!rec->metadata) {
5983                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5984                         rec->wrong_chunk_type = 1;
5985                 return;
5986         }
5987
5988         /* metadata extent, check the obvious case first */
5989         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5990                                  BTRFS_BLOCK_GROUP_METADATA))) {
5991                 rec->wrong_chunk_type = 1;
5992                 return;
5993         }
5994
5995         /*
5996          * Check SYSTEM extent, as it's also marked as metadata, we can only
5997          * make sure it's a SYSTEM extent by its backref
5998          */
5999         if (!list_empty(&rec->backrefs)) {
6000                 struct extent_backref *node;
6001                 struct tree_backref *tback;
6002                 u64 bg_type;
6003
6004                 node = to_extent_backref(rec->backrefs.next);
6005                 if (node->is_data) {
6006                         /* tree block shouldn't have data backref */
6007                         rec->wrong_chunk_type = 1;
6008                         return;
6009                 }
6010                 tback = container_of(node, struct tree_backref, node);
6011
6012                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6013                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6014                 else
6015                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6016                 if (!(bg_cache->flags & bg_type))
6017                         rec->wrong_chunk_type = 1;
6018         }
6019 }
6020
6021 /*
6022  * Allocate a new extent record, fill default values from @tmpl and insert int
6023  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6024  * the cache, otherwise it fails.
6025  */
6026 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6027                 struct extent_record *tmpl)
6028 {
6029         struct extent_record *rec;
6030         int ret = 0;
6031
6032         rec = malloc(sizeof(*rec));
6033         if (!rec)
6034                 return -ENOMEM;
6035         rec->start = tmpl->start;
6036         rec->max_size = tmpl->max_size;
6037         rec->nr = max(tmpl->nr, tmpl->max_size);
6038         rec->found_rec = tmpl->found_rec;
6039         rec->content_checked = tmpl->content_checked;
6040         rec->owner_ref_checked = tmpl->owner_ref_checked;
6041         rec->num_duplicates = 0;
6042         rec->metadata = tmpl->metadata;
6043         rec->flag_block_full_backref = FLAG_UNSET;
6044         rec->bad_full_backref = 0;
6045         rec->crossing_stripes = 0;
6046         rec->wrong_chunk_type = 0;
6047         rec->is_root = tmpl->is_root;
6048         rec->refs = tmpl->refs;
6049         rec->extent_item_refs = tmpl->extent_item_refs;
6050         rec->parent_generation = tmpl->parent_generation;
6051         INIT_LIST_HEAD(&rec->backrefs);
6052         INIT_LIST_HEAD(&rec->dups);
6053         INIT_LIST_HEAD(&rec->list);
6054         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6055         rec->cache.start = tmpl->start;
6056         rec->cache.size = tmpl->nr;
6057         ret = insert_cache_extent(extent_cache, &rec->cache);
6058         if (ret) {
6059                 free(rec);
6060                 return ret;
6061         }
6062         bytes_used += rec->nr;
6063
6064         if (tmpl->metadata)
6065                 rec->crossing_stripes = check_crossing_stripes(global_info,
6066                                 rec->start, global_info->tree_root->nodesize);
6067         check_extent_type(rec);
6068         return ret;
6069 }
6070
6071 /*
6072  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6073  * some are hints:
6074  * - refs              - if found, increase refs
6075  * - is_root           - if found, set
6076  * - content_checked   - if found, set
6077  * - owner_ref_checked - if found, set
6078  *
6079  * If not found, create a new one, initialize and insert.
6080  */
6081 static int add_extent_rec(struct cache_tree *extent_cache,
6082                 struct extent_record *tmpl)
6083 {
6084         struct extent_record *rec;
6085         struct cache_extent *cache;
6086         int ret = 0;
6087         int dup = 0;
6088
6089         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6090         if (cache) {
6091                 rec = container_of(cache, struct extent_record, cache);
6092                 if (tmpl->refs)
6093                         rec->refs++;
6094                 if (rec->nr == 1)
6095                         rec->nr = max(tmpl->nr, tmpl->max_size);
6096
6097                 /*
6098                  * We need to make sure to reset nr to whatever the extent
6099                  * record says was the real size, this way we can compare it to
6100                  * the backrefs.
6101                  */
6102                 if (tmpl->found_rec) {
6103                         if (tmpl->start != rec->start || rec->found_rec) {
6104                                 struct extent_record *tmp;
6105
6106                                 dup = 1;
6107                                 if (list_empty(&rec->list))
6108                                         list_add_tail(&rec->list,
6109                                                       &duplicate_extents);
6110
6111                                 /*
6112                                  * We have to do this song and dance in case we
6113                                  * find an extent record that falls inside of
6114                                  * our current extent record but does not have
6115                                  * the same objectid.
6116                                  */
6117                                 tmp = malloc(sizeof(*tmp));
6118                                 if (!tmp)
6119                                         return -ENOMEM;
6120                                 tmp->start = tmpl->start;
6121                                 tmp->max_size = tmpl->max_size;
6122                                 tmp->nr = tmpl->nr;
6123                                 tmp->found_rec = 1;
6124                                 tmp->metadata = tmpl->metadata;
6125                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6126                                 INIT_LIST_HEAD(&tmp->list);
6127                                 list_add_tail(&tmp->list, &rec->dups);
6128                                 rec->num_duplicates++;
6129                         } else {
6130                                 rec->nr = tmpl->nr;
6131                                 rec->found_rec = 1;
6132                         }
6133                 }
6134
6135                 if (tmpl->extent_item_refs && !dup) {
6136                         if (rec->extent_item_refs) {
6137                                 fprintf(stderr, "block %llu rec "
6138                                         "extent_item_refs %llu, passed %llu\n",
6139                                         (unsigned long long)tmpl->start,
6140                                         (unsigned long long)
6141                                                         rec->extent_item_refs,
6142                                         (unsigned long long)tmpl->extent_item_refs);
6143                         }
6144                         rec->extent_item_refs = tmpl->extent_item_refs;
6145                 }
6146                 if (tmpl->is_root)
6147                         rec->is_root = 1;
6148                 if (tmpl->content_checked)
6149                         rec->content_checked = 1;
6150                 if (tmpl->owner_ref_checked)
6151                         rec->owner_ref_checked = 1;
6152                 memcpy(&rec->parent_key, &tmpl->parent_key,
6153                                 sizeof(tmpl->parent_key));
6154                 if (tmpl->parent_generation)
6155                         rec->parent_generation = tmpl->parent_generation;
6156                 if (rec->max_size < tmpl->max_size)
6157                         rec->max_size = tmpl->max_size;
6158
6159                 /*
6160                  * A metadata extent can't cross stripe_len boundary, otherwise
6161                  * kernel scrub won't be able to handle it.
6162                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6163                  * it.
6164                  */
6165                 if (tmpl->metadata)
6166                         rec->crossing_stripes = check_crossing_stripes(
6167                                         global_info, rec->start,
6168                                         global_info->tree_root->nodesize);
6169                 check_extent_type(rec);
6170                 maybe_free_extent_rec(extent_cache, rec);
6171                 return ret;
6172         }
6173
6174         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6175
6176         return ret;
6177 }
6178
6179 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6180                             u64 parent, u64 root, int found_ref)
6181 {
6182         struct extent_record *rec;
6183         struct tree_backref *back;
6184         struct cache_extent *cache;
6185         int ret;
6186
6187         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6188         if (!cache) {
6189                 struct extent_record tmpl;
6190
6191                 memset(&tmpl, 0, sizeof(tmpl));
6192                 tmpl.start = bytenr;
6193                 tmpl.nr = 1;
6194                 tmpl.metadata = 1;
6195
6196                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6197                 if (ret)
6198                         return ret;
6199
6200                 /* really a bug in cache_extent implement now */
6201                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6202                 if (!cache)
6203                         return -ENOENT;
6204         }
6205
6206         rec = container_of(cache, struct extent_record, cache);
6207         if (rec->start != bytenr) {
6208                 /*
6209                  * Several cause, from unaligned bytenr to over lapping extents
6210                  */
6211                 return -EEXIST;
6212         }
6213
6214         back = find_tree_backref(rec, parent, root);
6215         if (!back) {
6216                 back = alloc_tree_backref(rec, parent, root);
6217                 if (!back)
6218                         return -ENOMEM;
6219         }
6220
6221         if (found_ref) {
6222                 if (back->node.found_ref) {
6223                         fprintf(stderr, "Extent back ref already exists "
6224                                 "for %llu parent %llu root %llu \n",
6225                                 (unsigned long long)bytenr,
6226                                 (unsigned long long)parent,
6227                                 (unsigned long long)root);
6228                 }
6229                 back->node.found_ref = 1;
6230         } else {
6231                 if (back->node.found_extent_tree) {
6232                         fprintf(stderr, "Extent back ref already exists "
6233                                 "for %llu parent %llu root %llu \n",
6234                                 (unsigned long long)bytenr,
6235                                 (unsigned long long)parent,
6236                                 (unsigned long long)root);
6237                 }
6238                 back->node.found_extent_tree = 1;
6239         }
6240         check_extent_type(rec);
6241         maybe_free_extent_rec(extent_cache, rec);
6242         return 0;
6243 }
6244
6245 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6246                             u64 parent, u64 root, u64 owner, u64 offset,
6247                             u32 num_refs, int found_ref, u64 max_size)
6248 {
6249         struct extent_record *rec;
6250         struct data_backref *back;
6251         struct cache_extent *cache;
6252         int ret;
6253
6254         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6255         if (!cache) {
6256                 struct extent_record tmpl;
6257
6258                 memset(&tmpl, 0, sizeof(tmpl));
6259                 tmpl.start = bytenr;
6260                 tmpl.nr = 1;
6261                 tmpl.max_size = max_size;
6262
6263                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6264                 if (ret)
6265                         return ret;
6266
6267                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6268                 if (!cache)
6269                         abort();
6270         }
6271
6272         rec = container_of(cache, struct extent_record, cache);
6273         if (rec->max_size < max_size)
6274                 rec->max_size = max_size;
6275
6276         /*
6277          * If found_ref is set then max_size is the real size and must match the
6278          * existing refs.  So if we have already found a ref then we need to
6279          * make sure that this ref matches the existing one, otherwise we need
6280          * to add a new backref so we can notice that the backrefs don't match
6281          * and we need to figure out who is telling the truth.  This is to
6282          * account for that awful fsync bug I introduced where we'd end up with
6283          * a btrfs_file_extent_item that would have its length include multiple
6284          * prealloc extents or point inside of a prealloc extent.
6285          */
6286         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6287                                  bytenr, max_size);
6288         if (!back) {
6289                 back = alloc_data_backref(rec, parent, root, owner, offset,
6290                                           max_size);
6291                 BUG_ON(!back);
6292         }
6293
6294         if (found_ref) {
6295                 BUG_ON(num_refs != 1);
6296                 if (back->node.found_ref)
6297                         BUG_ON(back->bytes != max_size);
6298                 back->node.found_ref = 1;
6299                 back->found_ref += 1;
6300                 back->bytes = max_size;
6301                 back->disk_bytenr = bytenr;
6302                 rec->refs += 1;
6303                 rec->content_checked = 1;
6304                 rec->owner_ref_checked = 1;
6305         } else {
6306                 if (back->node.found_extent_tree) {
6307                         fprintf(stderr, "Extent back ref already exists "
6308                                 "for %llu parent %llu root %llu "
6309                                 "owner %llu offset %llu num_refs %lu\n",
6310                                 (unsigned long long)bytenr,
6311                                 (unsigned long long)parent,
6312                                 (unsigned long long)root,
6313                                 (unsigned long long)owner,
6314                                 (unsigned long long)offset,
6315                                 (unsigned long)num_refs);
6316                 }
6317                 back->num_refs = num_refs;
6318                 back->node.found_extent_tree = 1;
6319         }
6320         maybe_free_extent_rec(extent_cache, rec);
6321         return 0;
6322 }
6323
6324 static int add_pending(struct cache_tree *pending,
6325                        struct cache_tree *seen, u64 bytenr, u32 size)
6326 {
6327         int ret;
6328         ret = add_cache_extent(seen, bytenr, size);
6329         if (ret)
6330                 return ret;
6331         add_cache_extent(pending, bytenr, size);
6332         return 0;
6333 }
6334
6335 static int pick_next_pending(struct cache_tree *pending,
6336                         struct cache_tree *reada,
6337                         struct cache_tree *nodes,
6338                         u64 last, struct block_info *bits, int bits_nr,
6339                         int *reada_bits)
6340 {
6341         unsigned long node_start = last;
6342         struct cache_extent *cache;
6343         int ret;
6344
6345         cache = search_cache_extent(reada, 0);
6346         if (cache) {
6347                 bits[0].start = cache->start;
6348                 bits[0].size = cache->size;
6349                 *reada_bits = 1;
6350                 return 1;
6351         }
6352         *reada_bits = 0;
6353         if (node_start > 32768)
6354                 node_start -= 32768;
6355
6356         cache = search_cache_extent(nodes, node_start);
6357         if (!cache)
6358                 cache = search_cache_extent(nodes, 0);
6359
6360         if (!cache) {
6361                  cache = search_cache_extent(pending, 0);
6362                  if (!cache)
6363                          return 0;
6364                  ret = 0;
6365                  do {
6366                          bits[ret].start = cache->start;
6367                          bits[ret].size = cache->size;
6368                          cache = next_cache_extent(cache);
6369                          ret++;
6370                  } while (cache && ret < bits_nr);
6371                  return ret;
6372         }
6373
6374         ret = 0;
6375         do {
6376                 bits[ret].start = cache->start;
6377                 bits[ret].size = cache->size;
6378                 cache = next_cache_extent(cache);
6379                 ret++;
6380         } while (cache && ret < bits_nr);
6381
6382         if (bits_nr - ret > 8) {
6383                 u64 lookup = bits[0].start + bits[0].size;
6384                 struct cache_extent *next;
6385                 next = search_cache_extent(pending, lookup);
6386                 while(next) {
6387                         if (next->start - lookup > 32768)
6388                                 break;
6389                         bits[ret].start = next->start;
6390                         bits[ret].size = next->size;
6391                         lookup = next->start + next->size;
6392                         ret++;
6393                         if (ret == bits_nr)
6394                                 break;
6395                         next = next_cache_extent(next);
6396                         if (!next)
6397                                 break;
6398                 }
6399         }
6400         return ret;
6401 }
6402
6403 static void free_chunk_record(struct cache_extent *cache)
6404 {
6405         struct chunk_record *rec;
6406
6407         rec = container_of(cache, struct chunk_record, cache);
6408         list_del_init(&rec->list);
6409         list_del_init(&rec->dextents);
6410         free(rec);
6411 }
6412
6413 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6414 {
6415         cache_tree_free_extents(chunk_cache, free_chunk_record);
6416 }
6417
6418 static void free_device_record(struct rb_node *node)
6419 {
6420         struct device_record *rec;
6421
6422         rec = container_of(node, struct device_record, node);
6423         free(rec);
6424 }
6425
6426 FREE_RB_BASED_TREE(device_cache, free_device_record);
6427
6428 int insert_block_group_record(struct block_group_tree *tree,
6429                               struct block_group_record *bg_rec)
6430 {
6431         int ret;
6432
6433         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6434         if (ret)
6435                 return ret;
6436
6437         list_add_tail(&bg_rec->list, &tree->block_groups);
6438         return 0;
6439 }
6440
6441 static void free_block_group_record(struct cache_extent *cache)
6442 {
6443         struct block_group_record *rec;
6444
6445         rec = container_of(cache, struct block_group_record, cache);
6446         list_del_init(&rec->list);
6447         free(rec);
6448 }
6449
6450 void free_block_group_tree(struct block_group_tree *tree)
6451 {
6452         cache_tree_free_extents(&tree->tree, free_block_group_record);
6453 }
6454
6455 int insert_device_extent_record(struct device_extent_tree *tree,
6456                                 struct device_extent_record *de_rec)
6457 {
6458         int ret;
6459
6460         /*
6461          * Device extent is a bit different from the other extents, because
6462          * the extents which belong to the different devices may have the
6463          * same start and size, so we need use the special extent cache
6464          * search/insert functions.
6465          */
6466         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6467         if (ret)
6468                 return ret;
6469
6470         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6471         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6472         return 0;
6473 }
6474
6475 static void free_device_extent_record(struct cache_extent *cache)
6476 {
6477         struct device_extent_record *rec;
6478
6479         rec = container_of(cache, struct device_extent_record, cache);
6480         if (!list_empty(&rec->chunk_list))
6481                 list_del_init(&rec->chunk_list);
6482         if (!list_empty(&rec->device_list))
6483                 list_del_init(&rec->device_list);
6484         free(rec);
6485 }
6486
6487 void free_device_extent_tree(struct device_extent_tree *tree)
6488 {
6489         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6490 }
6491
6492 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6493 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6494                                  struct extent_buffer *leaf, int slot)
6495 {
6496         struct btrfs_extent_ref_v0 *ref0;
6497         struct btrfs_key key;
6498         int ret;
6499
6500         btrfs_item_key_to_cpu(leaf, &key, slot);
6501         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6502         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6503                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6504                                 0, 0);
6505         } else {
6506                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6507                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6508         }
6509         return ret;
6510 }
6511 #endif
6512
6513 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6514                                             struct btrfs_key *key,
6515                                             int slot)
6516 {
6517         struct btrfs_chunk *ptr;
6518         struct chunk_record *rec;
6519         int num_stripes, i;
6520
6521         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6522         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6523
6524         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6525         if (!rec) {
6526                 fprintf(stderr, "memory allocation failed\n");
6527                 exit(-1);
6528         }
6529
6530         INIT_LIST_HEAD(&rec->list);
6531         INIT_LIST_HEAD(&rec->dextents);
6532         rec->bg_rec = NULL;
6533
6534         rec->cache.start = key->offset;
6535         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6536
6537         rec->generation = btrfs_header_generation(leaf);
6538
6539         rec->objectid = key->objectid;
6540         rec->type = key->type;
6541         rec->offset = key->offset;
6542
6543         rec->length = rec->cache.size;
6544         rec->owner = btrfs_chunk_owner(leaf, ptr);
6545         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6546         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6547         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6548         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6549         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6550         rec->num_stripes = num_stripes;
6551         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6552
6553         for (i = 0; i < rec->num_stripes; ++i) {
6554                 rec->stripes[i].devid =
6555                         btrfs_stripe_devid_nr(leaf, ptr, i);
6556                 rec->stripes[i].offset =
6557                         btrfs_stripe_offset_nr(leaf, ptr, i);
6558                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6559                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6560                                 BTRFS_UUID_SIZE);
6561         }
6562
6563         return rec;
6564 }
6565
6566 static int process_chunk_item(struct cache_tree *chunk_cache,
6567                               struct btrfs_key *key, struct extent_buffer *eb,
6568                               int slot)
6569 {
6570         struct chunk_record *rec;
6571         struct btrfs_chunk *chunk;
6572         int ret = 0;
6573
6574         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6575         /*
6576          * Do extra check for this chunk item,
6577          *
6578          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6579          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6580          * and owner<->key_type check.
6581          */
6582         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6583                                       key->offset);
6584         if (ret < 0) {
6585                 error("chunk(%llu, %llu) is not valid, ignore it",
6586                       key->offset, btrfs_chunk_length(eb, chunk));
6587                 return 0;
6588         }
6589         rec = btrfs_new_chunk_record(eb, key, slot);
6590         ret = insert_cache_extent(chunk_cache, &rec->cache);
6591         if (ret) {
6592                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6593                         rec->offset, rec->length);
6594                 free(rec);
6595         }
6596
6597         return ret;
6598 }
6599
6600 static int process_device_item(struct rb_root *dev_cache,
6601                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6602 {
6603         struct btrfs_dev_item *ptr;
6604         struct device_record *rec;
6605         int ret = 0;
6606
6607         ptr = btrfs_item_ptr(eb,
6608                 slot, struct btrfs_dev_item);
6609
6610         rec = malloc(sizeof(*rec));
6611         if (!rec) {
6612                 fprintf(stderr, "memory allocation failed\n");
6613                 return -ENOMEM;
6614         }
6615
6616         rec->devid = key->offset;
6617         rec->generation = btrfs_header_generation(eb);
6618
6619         rec->objectid = key->objectid;
6620         rec->type = key->type;
6621         rec->offset = key->offset;
6622
6623         rec->devid = btrfs_device_id(eb, ptr);
6624         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6625         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6626
6627         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6628         if (ret) {
6629                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6630                 free(rec);
6631         }
6632
6633         return ret;
6634 }
6635
6636 struct block_group_record *
6637 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6638                              int slot)
6639 {
6640         struct btrfs_block_group_item *ptr;
6641         struct block_group_record *rec;
6642
6643         rec = calloc(1, sizeof(*rec));
6644         if (!rec) {
6645                 fprintf(stderr, "memory allocation failed\n");
6646                 exit(-1);
6647         }
6648
6649         rec->cache.start = key->objectid;
6650         rec->cache.size = key->offset;
6651
6652         rec->generation = btrfs_header_generation(leaf);
6653
6654         rec->objectid = key->objectid;
6655         rec->type = key->type;
6656         rec->offset = key->offset;
6657
6658         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6659         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6660
6661         INIT_LIST_HEAD(&rec->list);
6662
6663         return rec;
6664 }
6665
6666 static int process_block_group_item(struct block_group_tree *block_group_cache,
6667                                     struct btrfs_key *key,
6668                                     struct extent_buffer *eb, int slot)
6669 {
6670         struct block_group_record *rec;
6671         int ret = 0;
6672
6673         rec = btrfs_new_block_group_record(eb, key, slot);
6674         ret = insert_block_group_record(block_group_cache, rec);
6675         if (ret) {
6676                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6677                         rec->objectid, rec->offset);
6678                 free(rec);
6679         }
6680
6681         return ret;
6682 }
6683
6684 struct device_extent_record *
6685 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6686                                struct btrfs_key *key, int slot)
6687 {
6688         struct device_extent_record *rec;
6689         struct btrfs_dev_extent *ptr;
6690
6691         rec = calloc(1, sizeof(*rec));
6692         if (!rec) {
6693                 fprintf(stderr, "memory allocation failed\n");
6694                 exit(-1);
6695         }
6696
6697         rec->cache.objectid = key->objectid;
6698         rec->cache.start = key->offset;
6699
6700         rec->generation = btrfs_header_generation(leaf);
6701
6702         rec->objectid = key->objectid;
6703         rec->type = key->type;
6704         rec->offset = key->offset;
6705
6706         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6707         rec->chunk_objecteid =
6708                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6709         rec->chunk_offset =
6710                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6711         rec->length = btrfs_dev_extent_length(leaf, ptr);
6712         rec->cache.size = rec->length;
6713
6714         INIT_LIST_HEAD(&rec->chunk_list);
6715         INIT_LIST_HEAD(&rec->device_list);
6716
6717         return rec;
6718 }
6719
6720 static int
6721 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6722                            struct btrfs_key *key, struct extent_buffer *eb,
6723                            int slot)
6724 {
6725         struct device_extent_record *rec;
6726         int ret;
6727
6728         rec = btrfs_new_device_extent_record(eb, key, slot);
6729         ret = insert_device_extent_record(dev_extent_cache, rec);
6730         if (ret) {
6731                 fprintf(stderr,
6732                         "Device extent[%llu, %llu, %llu] existed.\n",
6733                         rec->objectid, rec->offset, rec->length);
6734                 free(rec);
6735         }
6736
6737         return ret;
6738 }
6739
6740 static int process_extent_item(struct btrfs_root *root,
6741                                struct cache_tree *extent_cache,
6742                                struct extent_buffer *eb, int slot)
6743 {
6744         struct btrfs_extent_item *ei;
6745         struct btrfs_extent_inline_ref *iref;
6746         struct btrfs_extent_data_ref *dref;
6747         struct btrfs_shared_data_ref *sref;
6748         struct btrfs_key key;
6749         struct extent_record tmpl;
6750         unsigned long end;
6751         unsigned long ptr;
6752         int ret;
6753         int type;
6754         u32 item_size = btrfs_item_size_nr(eb, slot);
6755         u64 refs = 0;
6756         u64 offset;
6757         u64 num_bytes;
6758         int metadata = 0;
6759
6760         btrfs_item_key_to_cpu(eb, &key, slot);
6761
6762         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6763                 metadata = 1;
6764                 num_bytes = root->nodesize;
6765         } else {
6766                 num_bytes = key.offset;
6767         }
6768
6769         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6770                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6771                       key.objectid, root->sectorsize);
6772                 return -EIO;
6773         }
6774         if (item_size < sizeof(*ei)) {
6775 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6776                 struct btrfs_extent_item_v0 *ei0;
6777                 BUG_ON(item_size != sizeof(*ei0));
6778                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6779                 refs = btrfs_extent_refs_v0(eb, ei0);
6780 #else
6781                 BUG();
6782 #endif
6783                 memset(&tmpl, 0, sizeof(tmpl));
6784                 tmpl.start = key.objectid;
6785                 tmpl.nr = num_bytes;
6786                 tmpl.extent_item_refs = refs;
6787                 tmpl.metadata = metadata;
6788                 tmpl.found_rec = 1;
6789                 tmpl.max_size = num_bytes;
6790
6791                 return add_extent_rec(extent_cache, &tmpl);
6792         }
6793
6794         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6795         refs = btrfs_extent_refs(eb, ei);
6796         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6797                 metadata = 1;
6798         else
6799                 metadata = 0;
6800         if (metadata && num_bytes != root->nodesize) {
6801                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6802                       num_bytes, root->nodesize);
6803                 return -EIO;
6804         }
6805         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6806                 error("ignore invalid data extent, length %llu is not aligned to %u",
6807                       num_bytes, root->sectorsize);
6808                 return -EIO;
6809         }
6810
6811         memset(&tmpl, 0, sizeof(tmpl));
6812         tmpl.start = key.objectid;
6813         tmpl.nr = num_bytes;
6814         tmpl.extent_item_refs = refs;
6815         tmpl.metadata = metadata;
6816         tmpl.found_rec = 1;
6817         tmpl.max_size = num_bytes;
6818         add_extent_rec(extent_cache, &tmpl);
6819
6820         ptr = (unsigned long)(ei + 1);
6821         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6822             key.type == BTRFS_EXTENT_ITEM_KEY)
6823                 ptr += sizeof(struct btrfs_tree_block_info);
6824
6825         end = (unsigned long)ei + item_size;
6826         while (ptr < end) {
6827                 iref = (struct btrfs_extent_inline_ref *)ptr;
6828                 type = btrfs_extent_inline_ref_type(eb, iref);
6829                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6830                 switch (type) {
6831                 case BTRFS_TREE_BLOCK_REF_KEY:
6832                         ret = add_tree_backref(extent_cache, key.objectid,
6833                                         0, offset, 0);
6834                         if (ret < 0)
6835                                 error("add_tree_backref failed: %s",
6836                                       strerror(-ret));
6837                         break;
6838                 case BTRFS_SHARED_BLOCK_REF_KEY:
6839                         ret = add_tree_backref(extent_cache, key.objectid,
6840                                         offset, 0, 0);
6841                         if (ret < 0)
6842                                 error("add_tree_backref failed: %s",
6843                                       strerror(-ret));
6844                         break;
6845                 case BTRFS_EXTENT_DATA_REF_KEY:
6846                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6847                         add_data_backref(extent_cache, key.objectid, 0,
6848                                         btrfs_extent_data_ref_root(eb, dref),
6849                                         btrfs_extent_data_ref_objectid(eb,
6850                                                                        dref),
6851                                         btrfs_extent_data_ref_offset(eb, dref),
6852                                         btrfs_extent_data_ref_count(eb, dref),
6853                                         0, num_bytes);
6854                         break;
6855                 case BTRFS_SHARED_DATA_REF_KEY:
6856                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6857                         add_data_backref(extent_cache, key.objectid, offset,
6858                                         0, 0, 0,
6859                                         btrfs_shared_data_ref_count(eb, sref),
6860                                         0, num_bytes);
6861                         break;
6862                 default:
6863                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6864                                 key.objectid, key.type, num_bytes);
6865                         goto out;
6866                 }
6867                 ptr += btrfs_extent_inline_ref_size(type);
6868         }
6869         WARN_ON(ptr > end);
6870 out:
6871         return 0;
6872 }
6873
6874 static int check_cache_range(struct btrfs_root *root,
6875                              struct btrfs_block_group_cache *cache,
6876                              u64 offset, u64 bytes)
6877 {
6878         struct btrfs_free_space *entry;
6879         u64 *logical;
6880         u64 bytenr;
6881         int stripe_len;
6882         int i, nr, ret;
6883
6884         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6885                 bytenr = btrfs_sb_offset(i);
6886                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6887                                        cache->key.objectid, bytenr, 0,
6888                                        &logical, &nr, &stripe_len);
6889                 if (ret)
6890                         return ret;
6891
6892                 while (nr--) {
6893                         if (logical[nr] + stripe_len <= offset)
6894                                 continue;
6895                         if (offset + bytes <= logical[nr])
6896                                 continue;
6897                         if (logical[nr] == offset) {
6898                                 if (stripe_len >= bytes) {
6899                                         free(logical);
6900                                         return 0;
6901                                 }
6902                                 bytes -= stripe_len;
6903                                 offset += stripe_len;
6904                         } else if (logical[nr] < offset) {
6905                                 if (logical[nr] + stripe_len >=
6906                                     offset + bytes) {
6907                                         free(logical);
6908                                         return 0;
6909                                 }
6910                                 bytes = (offset + bytes) -
6911                                         (logical[nr] + stripe_len);
6912                                 offset = logical[nr] + stripe_len;
6913                         } else {
6914                                 /*
6915                                  * Could be tricky, the super may land in the
6916                                  * middle of the area we're checking.  First
6917                                  * check the easiest case, it's at the end.
6918                                  */
6919                                 if (logical[nr] + stripe_len >=
6920                                     bytes + offset) {
6921                                         bytes = logical[nr] - offset;
6922                                         continue;
6923                                 }
6924
6925                                 /* Check the left side */
6926                                 ret = check_cache_range(root, cache,
6927                                                         offset,
6928                                                         logical[nr] - offset);
6929                                 if (ret) {
6930                                         free(logical);
6931                                         return ret;
6932                                 }
6933
6934                                 /* Now we continue with the right side */
6935                                 bytes = (offset + bytes) -
6936                                         (logical[nr] + stripe_len);
6937                                 offset = logical[nr] + stripe_len;
6938                         }
6939                 }
6940
6941                 free(logical);
6942         }
6943
6944         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6945         if (!entry) {
6946                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6947                         offset, offset+bytes);
6948                 return -EINVAL;
6949         }
6950
6951         if (entry->offset != offset) {
6952                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6953                         entry->offset);
6954                 return -EINVAL;
6955         }
6956
6957         if (entry->bytes != bytes) {
6958                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6959                         bytes, entry->bytes, offset);
6960                 return -EINVAL;
6961         }
6962
6963         unlink_free_space(cache->free_space_ctl, entry);
6964         free(entry);
6965         return 0;
6966 }
6967
6968 static int verify_space_cache(struct btrfs_root *root,
6969                               struct btrfs_block_group_cache *cache)
6970 {
6971         struct btrfs_path path;
6972         struct extent_buffer *leaf;
6973         struct btrfs_key key;
6974         u64 last;
6975         int ret = 0;
6976
6977         root = root->fs_info->extent_root;
6978
6979         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6980
6981         btrfs_init_path(&path);
6982         key.objectid = last;
6983         key.offset = 0;
6984         key.type = BTRFS_EXTENT_ITEM_KEY;
6985         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6986         if (ret < 0)
6987                 goto out;
6988         ret = 0;
6989         while (1) {
6990                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6991                         ret = btrfs_next_leaf(root, &path);
6992                         if (ret < 0)
6993                                 goto out;
6994                         if (ret > 0) {
6995                                 ret = 0;
6996                                 break;
6997                         }
6998                 }
6999                 leaf = path.nodes[0];
7000                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7001                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7002                         break;
7003                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7004                     key.type != BTRFS_METADATA_ITEM_KEY) {
7005                         path.slots[0]++;
7006                         continue;
7007                 }
7008
7009                 if (last == key.objectid) {
7010                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7011                                 last = key.objectid + key.offset;
7012                         else
7013                                 last = key.objectid + root->nodesize;
7014                         path.slots[0]++;
7015                         continue;
7016                 }
7017
7018                 ret = check_cache_range(root, cache, last,
7019                                         key.objectid - last);
7020                 if (ret)
7021                         break;
7022                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7023                         last = key.objectid + key.offset;
7024                 else
7025                         last = key.objectid + root->nodesize;
7026                 path.slots[0]++;
7027         }
7028
7029         if (last < cache->key.objectid + cache->key.offset)
7030                 ret = check_cache_range(root, cache, last,
7031                                         cache->key.objectid +
7032                                         cache->key.offset - last);
7033
7034 out:
7035         btrfs_release_path(&path);
7036
7037         if (!ret &&
7038             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7039                 fprintf(stderr, "There are still entries left in the space "
7040                         "cache\n");
7041                 ret = -EINVAL;
7042         }
7043
7044         return ret;
7045 }
7046
7047 static int check_space_cache(struct btrfs_root *root)
7048 {
7049         struct btrfs_block_group_cache *cache;
7050         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7051         int ret;
7052         int error = 0;
7053
7054         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7055             btrfs_super_generation(root->fs_info->super_copy) !=
7056             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7057                 printf("cache and super generation don't match, space cache "
7058                        "will be invalidated\n");
7059                 return 0;
7060         }
7061
7062         if (ctx.progress_enabled) {
7063                 ctx.tp = TASK_FREE_SPACE;
7064                 task_start(ctx.info);
7065         }
7066
7067         while (1) {
7068                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7069                 if (!cache)
7070                         break;
7071
7072                 start = cache->key.objectid + cache->key.offset;
7073                 if (!cache->free_space_ctl) {
7074                         if (btrfs_init_free_space_ctl(cache,
7075                                                       root->sectorsize)) {
7076                                 ret = -ENOMEM;
7077                                 break;
7078                         }
7079                 } else {
7080                         btrfs_remove_free_space_cache(cache);
7081                 }
7082
7083                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7084                         ret = exclude_super_stripes(root, cache);
7085                         if (ret) {
7086                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7087                                         strerror(-ret));
7088                                 error++;
7089                                 continue;
7090                         }
7091                         ret = load_free_space_tree(root->fs_info, cache);
7092                         free_excluded_extents(root, cache);
7093                         if (ret < 0) {
7094                                 fprintf(stderr, "could not load free space tree: %s\n",
7095                                         strerror(-ret));
7096                                 error++;
7097                                 continue;
7098                         }
7099                         error += ret;
7100                 } else {
7101                         ret = load_free_space_cache(root->fs_info, cache);
7102                         if (!ret)
7103                                 continue;
7104                 }
7105
7106                 ret = verify_space_cache(root, cache);
7107                 if (ret) {
7108                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7109                                 cache->key.objectid);
7110                         error++;
7111                 }
7112         }
7113
7114         task_stop(ctx.info);
7115
7116         return error ? -EINVAL : 0;
7117 }
7118
7119 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7120                         u64 num_bytes, unsigned long leaf_offset,
7121                         struct extent_buffer *eb) {
7122
7123         u64 offset = 0;
7124         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7125         char *data;
7126         unsigned long csum_offset;
7127         u32 csum;
7128         u32 csum_expected;
7129         u64 read_len;
7130         u64 data_checked = 0;
7131         u64 tmp;
7132         int ret = 0;
7133         int mirror;
7134         int num_copies;
7135
7136         if (num_bytes % root->sectorsize)
7137                 return -EINVAL;
7138
7139         data = malloc(num_bytes);
7140         if (!data)
7141                 return -ENOMEM;
7142
7143         while (offset < num_bytes) {
7144                 mirror = 0;
7145 again:
7146                 read_len = num_bytes - offset;
7147                 /* read as much space once a time */
7148                 ret = read_extent_data(root, data + offset,
7149                                 bytenr + offset, &read_len, mirror);
7150                 if (ret)
7151                         goto out;
7152                 data_checked = 0;
7153                 /* verify every 4k data's checksum */
7154                 while (data_checked < read_len) {
7155                         csum = ~(u32)0;
7156                         tmp = offset + data_checked;
7157
7158                         csum = btrfs_csum_data((char *)data + tmp,
7159                                                csum, root->sectorsize);
7160                         btrfs_csum_final(csum, (u8 *)&csum);
7161
7162                         csum_offset = leaf_offset +
7163                                  tmp / root->sectorsize * csum_size;
7164                         read_extent_buffer(eb, (char *)&csum_expected,
7165                                            csum_offset, csum_size);
7166                         /* try another mirror */
7167                         if (csum != csum_expected) {
7168                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7169                                                 mirror, bytenr + tmp,
7170                                                 csum, csum_expected);
7171                                 num_copies = btrfs_num_copies(
7172                                                 &root->fs_info->mapping_tree,
7173                                                 bytenr, num_bytes);
7174                                 if (mirror < num_copies - 1) {
7175                                         mirror += 1;
7176                                         goto again;
7177                                 }
7178                         }
7179                         data_checked += root->sectorsize;
7180                 }
7181                 offset += read_len;
7182         }
7183 out:
7184         free(data);
7185         return ret;
7186 }
7187
7188 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7189                                u64 num_bytes)
7190 {
7191         struct btrfs_path path;
7192         struct extent_buffer *leaf;
7193         struct btrfs_key key;
7194         int ret;
7195
7196         btrfs_init_path(&path);
7197         key.objectid = bytenr;
7198         key.type = BTRFS_EXTENT_ITEM_KEY;
7199         key.offset = (u64)-1;
7200
7201 again:
7202         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7203                                 0, 0);
7204         if (ret < 0) {
7205                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7206                 btrfs_release_path(&path);
7207                 return ret;
7208         } else if (ret) {
7209                 if (path.slots[0] > 0) {
7210                         path.slots[0]--;
7211                 } else {
7212                         ret = btrfs_prev_leaf(root, &path);
7213                         if (ret < 0) {
7214                                 goto out;
7215                         } else if (ret > 0) {
7216                                 ret = 0;
7217                                 goto out;
7218                         }
7219                 }
7220         }
7221
7222         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7223
7224         /*
7225          * Block group items come before extent items if they have the same
7226          * bytenr, so walk back one more just in case.  Dear future traveller,
7227          * first congrats on mastering time travel.  Now if it's not too much
7228          * trouble could you go back to 2006 and tell Chris to make the
7229          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7230          * EXTENT_ITEM_KEY please?
7231          */
7232         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7233                 if (path.slots[0] > 0) {
7234                         path.slots[0]--;
7235                 } else {
7236                         ret = btrfs_prev_leaf(root, &path);
7237                         if (ret < 0) {
7238                                 goto out;
7239                         } else if (ret > 0) {
7240                                 ret = 0;
7241                                 goto out;
7242                         }
7243                 }
7244                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7245         }
7246
7247         while (num_bytes) {
7248                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7249                         ret = btrfs_next_leaf(root, &path);
7250                         if (ret < 0) {
7251                                 fprintf(stderr, "Error going to next leaf "
7252                                         "%d\n", ret);
7253                                 btrfs_release_path(&path);
7254                                 return ret;
7255                         } else if (ret) {
7256                                 break;
7257                         }
7258                 }
7259                 leaf = path.nodes[0];
7260                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7261                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7262                         path.slots[0]++;
7263                         continue;
7264                 }
7265                 if (key.objectid + key.offset < bytenr) {
7266                         path.slots[0]++;
7267                         continue;
7268                 }
7269                 if (key.objectid > bytenr + num_bytes)
7270                         break;
7271
7272                 if (key.objectid == bytenr) {
7273                         if (key.offset >= num_bytes) {
7274                                 num_bytes = 0;
7275                                 break;
7276                         }
7277                         num_bytes -= key.offset;
7278                         bytenr += key.offset;
7279                 } else if (key.objectid < bytenr) {
7280                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7281                                 num_bytes = 0;
7282                                 break;
7283                         }
7284                         num_bytes = (bytenr + num_bytes) -
7285                                 (key.objectid + key.offset);
7286                         bytenr = key.objectid + key.offset;
7287                 } else {
7288                         if (key.objectid + key.offset < bytenr + num_bytes) {
7289                                 u64 new_start = key.objectid + key.offset;
7290                                 u64 new_bytes = bytenr + num_bytes - new_start;
7291
7292                                 /*
7293                                  * Weird case, the extent is in the middle of
7294                                  * our range, we'll have to search one side
7295                                  * and then the other.  Not sure if this happens
7296                                  * in real life, but no harm in coding it up
7297                                  * anyway just in case.
7298                                  */
7299                                 btrfs_release_path(&path);
7300                                 ret = check_extent_exists(root, new_start,
7301                                                           new_bytes);
7302                                 if (ret) {
7303                                         fprintf(stderr, "Right section didn't "
7304                                                 "have a record\n");
7305                                         break;
7306                                 }
7307                                 num_bytes = key.objectid - bytenr;
7308                                 goto again;
7309                         }
7310                         num_bytes = key.objectid - bytenr;
7311                 }
7312                 path.slots[0]++;
7313         }
7314         ret = 0;
7315
7316 out:
7317         if (num_bytes && !ret) {
7318                 fprintf(stderr, "There are no extents for csum range "
7319                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7320                 ret = 1;
7321         }
7322
7323         btrfs_release_path(&path);
7324         return ret;
7325 }
7326
7327 static int check_csums(struct btrfs_root *root)
7328 {
7329         struct btrfs_path path;
7330         struct extent_buffer *leaf;
7331         struct btrfs_key key;
7332         u64 offset = 0, num_bytes = 0;
7333         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7334         int errors = 0;
7335         int ret;
7336         u64 data_len;
7337         unsigned long leaf_offset;
7338
7339         root = root->fs_info->csum_root;
7340         if (!extent_buffer_uptodate(root->node)) {
7341                 fprintf(stderr, "No valid csum tree found\n");
7342                 return -ENOENT;
7343         }
7344
7345         btrfs_init_path(&path);
7346         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7347         key.type = BTRFS_EXTENT_CSUM_KEY;
7348         key.offset = 0;
7349         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7350         if (ret < 0) {
7351                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7352                 btrfs_release_path(&path);
7353                 return ret;
7354         }
7355
7356         if (ret > 0 && path.slots[0])
7357                 path.slots[0]--;
7358         ret = 0;
7359
7360         while (1) {
7361                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7362                         ret = btrfs_next_leaf(root, &path);
7363                         if (ret < 0) {
7364                                 fprintf(stderr, "Error going to next leaf "
7365                                         "%d\n", ret);
7366                                 break;
7367                         }
7368                         if (ret)
7369                                 break;
7370                 }
7371                 leaf = path.nodes[0];
7372
7373                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7374                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7375                         path.slots[0]++;
7376                         continue;
7377                 }
7378
7379                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7380                               csum_size) * root->sectorsize;
7381                 if (!check_data_csum)
7382                         goto skip_csum_check;
7383                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7384                 ret = check_extent_csums(root, key.offset, data_len,
7385                                          leaf_offset, leaf);
7386                 if (ret)
7387                         break;
7388 skip_csum_check:
7389                 if (!num_bytes) {
7390                         offset = key.offset;
7391                 } else if (key.offset != offset + num_bytes) {
7392                         ret = check_extent_exists(root, offset, num_bytes);
7393                         if (ret) {
7394                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7395                                         "there is no extent record\n",
7396                                         offset, offset+num_bytes);
7397                                 errors++;
7398                         }
7399                         offset = key.offset;
7400                         num_bytes = 0;
7401                 }
7402                 num_bytes += data_len;
7403                 path.slots[0]++;
7404         }
7405
7406         btrfs_release_path(&path);
7407         return errors;
7408 }
7409
7410 static int is_dropped_key(struct btrfs_key *key,
7411                           struct btrfs_key *drop_key) {
7412         if (key->objectid < drop_key->objectid)
7413                 return 1;
7414         else if (key->objectid == drop_key->objectid) {
7415                 if (key->type < drop_key->type)
7416                         return 1;
7417                 else if (key->type == drop_key->type) {
7418                         if (key->offset < drop_key->offset)
7419                                 return 1;
7420                 }
7421         }
7422         return 0;
7423 }
7424
7425 /*
7426  * Here are the rules for FULL_BACKREF.
7427  *
7428  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7429  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7430  *      FULL_BACKREF set.
7431  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7432  *    if it happened after the relocation occurred since we'll have dropped the
7433  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7434  *    have no real way to know for sure.
7435  *
7436  * We process the blocks one root at a time, and we start from the lowest root
7437  * objectid and go to the highest.  So we can just lookup the owner backref for
7438  * the record and if we don't find it then we know it doesn't exist and we have
7439  * a FULL BACKREF.
7440  *
7441  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7442  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7443  * be set or not and then we can check later once we've gathered all the refs.
7444  */
7445 static int calc_extent_flag(struct cache_tree *extent_cache,
7446                            struct extent_buffer *buf,
7447                            struct root_item_record *ri,
7448                            u64 *flags)
7449 {
7450         struct extent_record *rec;
7451         struct cache_extent *cache;
7452         struct tree_backref *tback;
7453         u64 owner = 0;
7454
7455         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7456         /* we have added this extent before */
7457         if (!cache)
7458                 return -ENOENT;
7459
7460         rec = container_of(cache, struct extent_record, cache);
7461
7462         /*
7463          * Except file/reloc tree, we can not have
7464          * FULL BACKREF MODE
7465          */
7466         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7467                 goto normal;
7468         /*
7469          * root node
7470          */
7471         if (buf->start == ri->bytenr)
7472                 goto normal;
7473
7474         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7475                 goto full_backref;
7476
7477         owner = btrfs_header_owner(buf);
7478         if (owner == ri->objectid)
7479                 goto normal;
7480
7481         tback = find_tree_backref(rec, 0, owner);
7482         if (!tback)
7483                 goto full_backref;
7484 normal:
7485         *flags = 0;
7486         if (rec->flag_block_full_backref != FLAG_UNSET &&
7487             rec->flag_block_full_backref != 0)
7488                 rec->bad_full_backref = 1;
7489         return 0;
7490 full_backref:
7491         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7492         if (rec->flag_block_full_backref != FLAG_UNSET &&
7493             rec->flag_block_full_backref != 1)
7494                 rec->bad_full_backref = 1;
7495         return 0;
7496 }
7497
7498 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7499 {
7500         fprintf(stderr, "Invalid key type(");
7501         print_key_type(stderr, 0, key_type);
7502         fprintf(stderr, ") found in root(");
7503         print_objectid(stderr, rootid, 0);
7504         fprintf(stderr, ")\n");
7505 }
7506
7507 /*
7508  * Check if the key is valid with its extent buffer.
7509  *
7510  * This is a early check in case invalid key exists in a extent buffer
7511  * This is not comprehensive yet, but should prevent wrong key/item passed
7512  * further
7513  */
7514 static int check_type_with_root(u64 rootid, u8 key_type)
7515 {
7516         switch (key_type) {
7517         /* Only valid in chunk tree */
7518         case BTRFS_DEV_ITEM_KEY:
7519         case BTRFS_CHUNK_ITEM_KEY:
7520                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7521                         goto err;
7522                 break;
7523         /* valid in csum and log tree */
7524         case BTRFS_CSUM_TREE_OBJECTID:
7525                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7526                       is_fstree(rootid)))
7527                         goto err;
7528                 break;
7529         case BTRFS_EXTENT_ITEM_KEY:
7530         case BTRFS_METADATA_ITEM_KEY:
7531         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7532                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7533                         goto err;
7534                 break;
7535         case BTRFS_ROOT_ITEM_KEY:
7536                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7537                         goto err;
7538                 break;
7539         case BTRFS_DEV_EXTENT_KEY:
7540                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7541                         goto err;
7542                 break;
7543         }
7544         return 0;
7545 err:
7546         report_mismatch_key_root(key_type, rootid);
7547         return -EINVAL;
7548 }
7549
7550 static int run_next_block(struct btrfs_root *root,
7551                           struct block_info *bits,
7552                           int bits_nr,
7553                           u64 *last,
7554                           struct cache_tree *pending,
7555                           struct cache_tree *seen,
7556                           struct cache_tree *reada,
7557                           struct cache_tree *nodes,
7558                           struct cache_tree *extent_cache,
7559                           struct cache_tree *chunk_cache,
7560                           struct rb_root *dev_cache,
7561                           struct block_group_tree *block_group_cache,
7562                           struct device_extent_tree *dev_extent_cache,
7563                           struct root_item_record *ri)
7564 {
7565         struct extent_buffer *buf;
7566         struct extent_record *rec = NULL;
7567         u64 bytenr;
7568         u32 size;
7569         u64 parent;
7570         u64 owner;
7571         u64 flags;
7572         u64 ptr;
7573         u64 gen = 0;
7574         int ret = 0;
7575         int i;
7576         int nritems;
7577         struct btrfs_key key;
7578         struct cache_extent *cache;
7579         int reada_bits;
7580
7581         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7582                                     bits_nr, &reada_bits);
7583         if (nritems == 0)
7584                 return 1;
7585
7586         if (!reada_bits) {
7587                 for(i = 0; i < nritems; i++) {
7588                         ret = add_cache_extent(reada, bits[i].start,
7589                                                bits[i].size);
7590                         if (ret == -EEXIST)
7591                                 continue;
7592
7593                         /* fixme, get the parent transid */
7594                         readahead_tree_block(root, bits[i].start,
7595                                              bits[i].size, 0);
7596                 }
7597         }
7598         *last = bits[0].start;
7599         bytenr = bits[0].start;
7600         size = bits[0].size;
7601
7602         cache = lookup_cache_extent(pending, bytenr, size);
7603         if (cache) {
7604                 remove_cache_extent(pending, cache);
7605                 free(cache);
7606         }
7607         cache = lookup_cache_extent(reada, bytenr, size);
7608         if (cache) {
7609                 remove_cache_extent(reada, cache);
7610                 free(cache);
7611         }
7612         cache = lookup_cache_extent(nodes, bytenr, size);
7613         if (cache) {
7614                 remove_cache_extent(nodes, cache);
7615                 free(cache);
7616         }
7617         cache = lookup_cache_extent(extent_cache, bytenr, size);
7618         if (cache) {
7619                 rec = container_of(cache, struct extent_record, cache);
7620                 gen = rec->parent_generation;
7621         }
7622
7623         /* fixme, get the real parent transid */
7624         buf = read_tree_block(root, bytenr, size, gen);
7625         if (!extent_buffer_uptodate(buf)) {
7626                 record_bad_block_io(root->fs_info,
7627                                     extent_cache, bytenr, size);
7628                 goto out;
7629         }
7630
7631         nritems = btrfs_header_nritems(buf);
7632
7633         flags = 0;
7634         if (!init_extent_tree) {
7635                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7636                                        btrfs_header_level(buf), 1, NULL,
7637                                        &flags);
7638                 if (ret < 0) {
7639                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7640                         if (ret < 0) {
7641                                 fprintf(stderr, "Couldn't calc extent flags\n");
7642                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7643                         }
7644                 }
7645         } else {
7646                 flags = 0;
7647                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7648                 if (ret < 0) {
7649                         fprintf(stderr, "Couldn't calc extent flags\n");
7650                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7651                 }
7652         }
7653
7654         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7655                 if (ri != NULL &&
7656                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7657                     ri->objectid == btrfs_header_owner(buf)) {
7658                         /*
7659                          * Ok we got to this block from it's original owner and
7660                          * we have FULL_BACKREF set.  Relocation can leave
7661                          * converted blocks over so this is altogether possible,
7662                          * however it's not possible if the generation > the
7663                          * last snapshot, so check for this case.
7664                          */
7665                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7666                             btrfs_header_generation(buf) > ri->last_snapshot) {
7667                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7668                                 rec->bad_full_backref = 1;
7669                         }
7670                 }
7671         } else {
7672                 if (ri != NULL &&
7673                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7674                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7675                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7676                         rec->bad_full_backref = 1;
7677                 }
7678         }
7679
7680         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7681                 rec->flag_block_full_backref = 1;
7682                 parent = bytenr;
7683                 owner = 0;
7684         } else {
7685                 rec->flag_block_full_backref = 0;
7686                 parent = 0;
7687                 owner = btrfs_header_owner(buf);
7688         }
7689
7690         ret = check_block(root, extent_cache, buf, flags);
7691         if (ret)
7692                 goto out;
7693
7694         if (btrfs_is_leaf(buf)) {
7695                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7696                 for (i = 0; i < nritems; i++) {
7697                         struct btrfs_file_extent_item *fi;
7698                         btrfs_item_key_to_cpu(buf, &key, i);
7699                         /*
7700                          * Check key type against the leaf owner.
7701                          * Could filter quite a lot of early error if
7702                          * owner is correct
7703                          */
7704                         if (check_type_with_root(btrfs_header_owner(buf),
7705                                                  key.type)) {
7706                                 fprintf(stderr, "ignoring invalid key\n");
7707                                 continue;
7708                         }
7709                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7710                                 process_extent_item(root, extent_cache, buf,
7711                                                     i);
7712                                 continue;
7713                         }
7714                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7715                                 process_extent_item(root, extent_cache, buf,
7716                                                     i);
7717                                 continue;
7718                         }
7719                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7720                                 total_csum_bytes +=
7721                                         btrfs_item_size_nr(buf, i);
7722                                 continue;
7723                         }
7724                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7725                                 process_chunk_item(chunk_cache, &key, buf, i);
7726                                 continue;
7727                         }
7728                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7729                                 process_device_item(dev_cache, &key, buf, i);
7730                                 continue;
7731                         }
7732                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7733                                 process_block_group_item(block_group_cache,
7734                                         &key, buf, i);
7735                                 continue;
7736                         }
7737                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7738                                 process_device_extent_item(dev_extent_cache,
7739                                         &key, buf, i);
7740                                 continue;
7741
7742                         }
7743                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7744 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7745                                 process_extent_ref_v0(extent_cache, buf, i);
7746 #else
7747                                 BUG();
7748 #endif
7749                                 continue;
7750                         }
7751
7752                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7753                                 ret = add_tree_backref(extent_cache,
7754                                                 key.objectid, 0, key.offset, 0);
7755                                 if (ret < 0)
7756                                         error("add_tree_backref failed: %s",
7757                                               strerror(-ret));
7758                                 continue;
7759                         }
7760                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7761                                 ret = add_tree_backref(extent_cache,
7762                                                 key.objectid, key.offset, 0, 0);
7763                                 if (ret < 0)
7764                                         error("add_tree_backref failed: %s",
7765                                               strerror(-ret));
7766                                 continue;
7767                         }
7768                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7769                                 struct btrfs_extent_data_ref *ref;
7770                                 ref = btrfs_item_ptr(buf, i,
7771                                                 struct btrfs_extent_data_ref);
7772                                 add_data_backref(extent_cache,
7773                                         key.objectid, 0,
7774                                         btrfs_extent_data_ref_root(buf, ref),
7775                                         btrfs_extent_data_ref_objectid(buf,
7776                                                                        ref),
7777                                         btrfs_extent_data_ref_offset(buf, ref),
7778                                         btrfs_extent_data_ref_count(buf, ref),
7779                                         0, root->sectorsize);
7780                                 continue;
7781                         }
7782                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7783                                 struct btrfs_shared_data_ref *ref;
7784                                 ref = btrfs_item_ptr(buf, i,
7785                                                 struct btrfs_shared_data_ref);
7786                                 add_data_backref(extent_cache,
7787                                         key.objectid, key.offset, 0, 0, 0,
7788                                         btrfs_shared_data_ref_count(buf, ref),
7789                                         0, root->sectorsize);
7790                                 continue;
7791                         }
7792                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7793                                 struct bad_item *bad;
7794
7795                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7796                                         continue;
7797                                 if (!owner)
7798                                         continue;
7799                                 bad = malloc(sizeof(struct bad_item));
7800                                 if (!bad)
7801                                         continue;
7802                                 INIT_LIST_HEAD(&bad->list);
7803                                 memcpy(&bad->key, &key,
7804                                        sizeof(struct btrfs_key));
7805                                 bad->root_id = owner;
7806                                 list_add_tail(&bad->list, &delete_items);
7807                                 continue;
7808                         }
7809                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7810                                 continue;
7811                         fi = btrfs_item_ptr(buf, i,
7812                                             struct btrfs_file_extent_item);
7813                         if (btrfs_file_extent_type(buf, fi) ==
7814                             BTRFS_FILE_EXTENT_INLINE)
7815                                 continue;
7816                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7817                                 continue;
7818
7819                         data_bytes_allocated +=
7820                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7821                         if (data_bytes_allocated < root->sectorsize) {
7822                                 abort();
7823                         }
7824                         data_bytes_referenced +=
7825                                 btrfs_file_extent_num_bytes(buf, fi);
7826                         add_data_backref(extent_cache,
7827                                 btrfs_file_extent_disk_bytenr(buf, fi),
7828                                 parent, owner, key.objectid, key.offset -
7829                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7830                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7831                 }
7832         } else {
7833                 int level;
7834                 struct btrfs_key first_key;
7835
7836                 first_key.objectid = 0;
7837
7838                 if (nritems > 0)
7839                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7840                 level = btrfs_header_level(buf);
7841                 for (i = 0; i < nritems; i++) {
7842                         struct extent_record tmpl;
7843
7844                         ptr = btrfs_node_blockptr(buf, i);
7845                         size = root->nodesize;
7846                         btrfs_node_key_to_cpu(buf, &key, i);
7847                         if (ri != NULL) {
7848                                 if ((level == ri->drop_level)
7849                                     && is_dropped_key(&key, &ri->drop_key)) {
7850                                         continue;
7851                                 }
7852                         }
7853
7854                         memset(&tmpl, 0, sizeof(tmpl));
7855                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7856                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7857                         tmpl.start = ptr;
7858                         tmpl.nr = size;
7859                         tmpl.refs = 1;
7860                         tmpl.metadata = 1;
7861                         tmpl.max_size = size;
7862                         ret = add_extent_rec(extent_cache, &tmpl);
7863                         if (ret < 0)
7864                                 goto out;
7865
7866                         ret = add_tree_backref(extent_cache, ptr, parent,
7867                                         owner, 1);
7868                         if (ret < 0) {
7869                                 error("add_tree_backref failed: %s",
7870                                       strerror(-ret));
7871                                 continue;
7872                         }
7873
7874                         if (level > 1) {
7875                                 add_pending(nodes, seen, ptr, size);
7876                         } else {
7877                                 add_pending(pending, seen, ptr, size);
7878                         }
7879                 }
7880                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7881                                       nritems) * sizeof(struct btrfs_key_ptr);
7882         }
7883         total_btree_bytes += buf->len;
7884         if (fs_root_objectid(btrfs_header_owner(buf)))
7885                 total_fs_tree_bytes += buf->len;
7886         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7887                 total_extent_tree_bytes += buf->len;
7888         if (!found_old_backref &&
7889             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7890             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7891             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7892                 found_old_backref = 1;
7893 out:
7894         free_extent_buffer(buf);
7895         return ret;
7896 }
7897
7898 static int add_root_to_pending(struct extent_buffer *buf,
7899                                struct cache_tree *extent_cache,
7900                                struct cache_tree *pending,
7901                                struct cache_tree *seen,
7902                                struct cache_tree *nodes,
7903                                u64 objectid)
7904 {
7905         struct extent_record tmpl;
7906         int ret;
7907
7908         if (btrfs_header_level(buf) > 0)
7909                 add_pending(nodes, seen, buf->start, buf->len);
7910         else
7911                 add_pending(pending, seen, buf->start, buf->len);
7912
7913         memset(&tmpl, 0, sizeof(tmpl));
7914         tmpl.start = buf->start;
7915         tmpl.nr = buf->len;
7916         tmpl.is_root = 1;
7917         tmpl.refs = 1;
7918         tmpl.metadata = 1;
7919         tmpl.max_size = buf->len;
7920         add_extent_rec(extent_cache, &tmpl);
7921
7922         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7923             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7924                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7925                                 0, 1);
7926         else
7927                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7928                                 1);
7929         return ret;
7930 }
7931
7932 /* as we fix the tree, we might be deleting blocks that
7933  * we're tracking for repair.  This hook makes sure we
7934  * remove any backrefs for blocks as we are fixing them.
7935  */
7936 static int free_extent_hook(struct btrfs_trans_handle *trans,
7937                             struct btrfs_root *root,
7938                             u64 bytenr, u64 num_bytes, u64 parent,
7939                             u64 root_objectid, u64 owner, u64 offset,
7940                             int refs_to_drop)
7941 {
7942         struct extent_record *rec;
7943         struct cache_extent *cache;
7944         int is_data;
7945         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7946
7947         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7948         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7949         if (!cache)
7950                 return 0;
7951
7952         rec = container_of(cache, struct extent_record, cache);
7953         if (is_data) {
7954                 struct data_backref *back;
7955                 back = find_data_backref(rec, parent, root_objectid, owner,
7956                                          offset, 1, bytenr, num_bytes);
7957                 if (!back)
7958                         goto out;
7959                 if (back->node.found_ref) {
7960                         back->found_ref -= refs_to_drop;
7961                         if (rec->refs)
7962                                 rec->refs -= refs_to_drop;
7963                 }
7964                 if (back->node.found_extent_tree) {
7965                         back->num_refs -= refs_to_drop;
7966                         if (rec->extent_item_refs)
7967                                 rec->extent_item_refs -= refs_to_drop;
7968                 }
7969                 if (back->found_ref == 0)
7970                         back->node.found_ref = 0;
7971                 if (back->num_refs == 0)
7972                         back->node.found_extent_tree = 0;
7973
7974                 if (!back->node.found_extent_tree && back->node.found_ref) {
7975                         list_del(&back->node.list);
7976                         free(back);
7977                 }
7978         } else {
7979                 struct tree_backref *back;
7980                 back = find_tree_backref(rec, parent, root_objectid);
7981                 if (!back)
7982                         goto out;
7983                 if (back->node.found_ref) {
7984                         if (rec->refs)
7985                                 rec->refs--;
7986                         back->node.found_ref = 0;
7987                 }
7988                 if (back->node.found_extent_tree) {
7989                         if (rec->extent_item_refs)
7990                                 rec->extent_item_refs--;
7991                         back->node.found_extent_tree = 0;
7992                 }
7993                 if (!back->node.found_extent_tree && back->node.found_ref) {
7994                         list_del(&back->node.list);
7995                         free(back);
7996                 }
7997         }
7998         maybe_free_extent_rec(extent_cache, rec);
7999 out:
8000         return 0;
8001 }
8002
8003 static int delete_extent_records(struct btrfs_trans_handle *trans,
8004                                  struct btrfs_root *root,
8005                                  struct btrfs_path *path,
8006                                  u64 bytenr)
8007 {
8008         struct btrfs_key key;
8009         struct btrfs_key found_key;
8010         struct extent_buffer *leaf;
8011         int ret;
8012         int slot;
8013
8014
8015         key.objectid = bytenr;
8016         key.type = (u8)-1;
8017         key.offset = (u64)-1;
8018
8019         while(1) {
8020                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8021                                         &key, path, 0, 1);
8022                 if (ret < 0)
8023                         break;
8024
8025                 if (ret > 0) {
8026                         ret = 0;
8027                         if (path->slots[0] == 0)
8028                                 break;
8029                         path->slots[0]--;
8030                 }
8031                 ret = 0;
8032
8033                 leaf = path->nodes[0];
8034                 slot = path->slots[0];
8035
8036                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8037                 if (found_key.objectid != bytenr)
8038                         break;
8039
8040                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8041                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8042                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8043                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8044                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8045                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8046                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8047                         btrfs_release_path(path);
8048                         if (found_key.type == 0) {
8049                                 if (found_key.offset == 0)
8050                                         break;
8051                                 key.offset = found_key.offset - 1;
8052                                 key.type = found_key.type;
8053                         }
8054                         key.type = found_key.type - 1;
8055                         key.offset = (u64)-1;
8056                         continue;
8057                 }
8058
8059                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8060                         found_key.objectid, found_key.type, found_key.offset);
8061
8062                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8063                 if (ret)
8064                         break;
8065                 btrfs_release_path(path);
8066
8067                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8068                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8069                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8070                                 found_key.offset : root->nodesize;
8071
8072                         ret = btrfs_update_block_group(trans, root, bytenr,
8073                                                        bytes, 0, 0);
8074                         if (ret)
8075                                 break;
8076                 }
8077         }
8078
8079         btrfs_release_path(path);
8080         return ret;
8081 }
8082
8083 /*
8084  * for a single backref, this will allocate a new extent
8085  * and add the backref to it.
8086  */
8087 static int record_extent(struct btrfs_trans_handle *trans,
8088                          struct btrfs_fs_info *info,
8089                          struct btrfs_path *path,
8090                          struct extent_record *rec,
8091                          struct extent_backref *back,
8092                          int allocated, u64 flags)
8093 {
8094         int ret = 0;
8095         struct btrfs_root *extent_root = info->extent_root;
8096         struct extent_buffer *leaf;
8097         struct btrfs_key ins_key;
8098         struct btrfs_extent_item *ei;
8099         struct data_backref *dback;
8100         struct btrfs_tree_block_info *bi;
8101
8102         if (!back->is_data)
8103                 rec->max_size = max_t(u64, rec->max_size,
8104                                     info->extent_root->nodesize);
8105
8106         if (!allocated) {
8107                 u32 item_size = sizeof(*ei);
8108
8109                 if (!back->is_data)
8110                         item_size += sizeof(*bi);
8111
8112                 ins_key.objectid = rec->start;
8113                 ins_key.offset = rec->max_size;
8114                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8115
8116                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8117                                         &ins_key, item_size);
8118                 if (ret)
8119                         goto fail;
8120
8121                 leaf = path->nodes[0];
8122                 ei = btrfs_item_ptr(leaf, path->slots[0],
8123                                     struct btrfs_extent_item);
8124
8125                 btrfs_set_extent_refs(leaf, ei, 0);
8126                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8127
8128                 if (back->is_data) {
8129                         btrfs_set_extent_flags(leaf, ei,
8130                                                BTRFS_EXTENT_FLAG_DATA);
8131                 } else {
8132                         struct btrfs_disk_key copy_key;;
8133
8134                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8135                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8136                                              sizeof(*bi));
8137
8138                         btrfs_set_disk_key_objectid(&copy_key,
8139                                                     rec->info_objectid);
8140                         btrfs_set_disk_key_type(&copy_key, 0);
8141                         btrfs_set_disk_key_offset(&copy_key, 0);
8142
8143                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8144                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8145
8146                         btrfs_set_extent_flags(leaf, ei,
8147                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8148                 }
8149
8150                 btrfs_mark_buffer_dirty(leaf);
8151                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8152                                                rec->max_size, 1, 0);
8153                 if (ret)
8154                         goto fail;
8155                 btrfs_release_path(path);
8156         }
8157
8158         if (back->is_data) {
8159                 u64 parent;
8160                 int i;
8161
8162                 dback = to_data_backref(back);
8163                 if (back->full_backref)
8164                         parent = dback->parent;
8165                 else
8166                         parent = 0;
8167
8168                 for (i = 0; i < dback->found_ref; i++) {
8169                         /* if parent != 0, we're doing a full backref
8170                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8171                          * just makes the backref allocator create a data
8172                          * backref
8173                          */
8174                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8175                                                    rec->start, rec->max_size,
8176                                                    parent,
8177                                                    dback->root,
8178                                                    parent ?
8179                                                    BTRFS_FIRST_FREE_OBJECTID :
8180                                                    dback->owner,
8181                                                    dback->offset);
8182                         if (ret)
8183                                 break;
8184                 }
8185                 fprintf(stderr, "adding new data backref"
8186                                 " on %llu %s %llu owner %llu"
8187                                 " offset %llu found %d\n",
8188                                 (unsigned long long)rec->start,
8189                                 back->full_backref ?
8190                                 "parent" : "root",
8191                                 back->full_backref ?
8192                                 (unsigned long long)parent :
8193                                 (unsigned long long)dback->root,
8194                                 (unsigned long long)dback->owner,
8195                                 (unsigned long long)dback->offset,
8196                                 dback->found_ref);
8197         } else {
8198                 u64 parent;
8199                 struct tree_backref *tback;
8200
8201                 tback = to_tree_backref(back);
8202                 if (back->full_backref)
8203                         parent = tback->parent;
8204                 else
8205                         parent = 0;
8206
8207                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8208                                            rec->start, rec->max_size,
8209                                            parent, tback->root, 0, 0);
8210                 fprintf(stderr, "adding new tree backref on "
8211                         "start %llu len %llu parent %llu root %llu\n",
8212                         rec->start, rec->max_size, parent, tback->root);
8213         }
8214 fail:
8215         btrfs_release_path(path);
8216         return ret;
8217 }
8218
8219 static struct extent_entry *find_entry(struct list_head *entries,
8220                                        u64 bytenr, u64 bytes)
8221 {
8222         struct extent_entry *entry = NULL;
8223
8224         list_for_each_entry(entry, entries, list) {
8225                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8226                         return entry;
8227         }
8228
8229         return NULL;
8230 }
8231
8232 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8233 {
8234         struct extent_entry *entry, *best = NULL, *prev = NULL;
8235
8236         list_for_each_entry(entry, entries, list) {
8237                 /*
8238                  * If there are as many broken entries as entries then we know
8239                  * not to trust this particular entry.
8240                  */
8241                 if (entry->broken == entry->count)
8242                         continue;
8243
8244                 /*
8245                  * Special case, when there are only two entries and 'best' is
8246                  * the first one
8247                  */
8248                 if (!prev) {
8249                         best = entry;
8250                         prev = entry;
8251                         continue;
8252                 }
8253
8254                 /*
8255                  * If our current entry == best then we can't be sure our best
8256                  * is really the best, so we need to keep searching.
8257                  */
8258                 if (best && best->count == entry->count) {
8259                         prev = entry;
8260                         best = NULL;
8261                         continue;
8262                 }
8263
8264                 /* Prev == entry, not good enough, have to keep searching */
8265                 if (!prev->broken && prev->count == entry->count)
8266                         continue;
8267
8268                 if (!best)
8269                         best = (prev->count > entry->count) ? prev : entry;
8270                 else if (best->count < entry->count)
8271                         best = entry;
8272                 prev = entry;
8273         }
8274
8275         return best;
8276 }
8277
8278 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8279                       struct data_backref *dback, struct extent_entry *entry)
8280 {
8281         struct btrfs_trans_handle *trans;
8282         struct btrfs_root *root;
8283         struct btrfs_file_extent_item *fi;
8284         struct extent_buffer *leaf;
8285         struct btrfs_key key;
8286         u64 bytenr, bytes;
8287         int ret, err;
8288
8289         key.objectid = dback->root;
8290         key.type = BTRFS_ROOT_ITEM_KEY;
8291         key.offset = (u64)-1;
8292         root = btrfs_read_fs_root(info, &key);
8293         if (IS_ERR(root)) {
8294                 fprintf(stderr, "Couldn't find root for our ref\n");
8295                 return -EINVAL;
8296         }
8297
8298         /*
8299          * The backref points to the original offset of the extent if it was
8300          * split, so we need to search down to the offset we have and then walk
8301          * forward until we find the backref we're looking for.
8302          */
8303         key.objectid = dback->owner;
8304         key.type = BTRFS_EXTENT_DATA_KEY;
8305         key.offset = dback->offset;
8306         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8307         if (ret < 0) {
8308                 fprintf(stderr, "Error looking up ref %d\n", ret);
8309                 return ret;
8310         }
8311
8312         while (1) {
8313                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8314                         ret = btrfs_next_leaf(root, path);
8315                         if (ret) {
8316                                 fprintf(stderr, "Couldn't find our ref, next\n");
8317                                 return -EINVAL;
8318                         }
8319                 }
8320                 leaf = path->nodes[0];
8321                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8322                 if (key.objectid != dback->owner ||
8323                     key.type != BTRFS_EXTENT_DATA_KEY) {
8324                         fprintf(stderr, "Couldn't find our ref, search\n");
8325                         return -EINVAL;
8326                 }
8327                 fi = btrfs_item_ptr(leaf, path->slots[0],
8328                                     struct btrfs_file_extent_item);
8329                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8330                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8331
8332                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8333                         break;
8334                 path->slots[0]++;
8335         }
8336
8337         btrfs_release_path(path);
8338
8339         trans = btrfs_start_transaction(root, 1);
8340         if (IS_ERR(trans))
8341                 return PTR_ERR(trans);
8342
8343         /*
8344          * Ok we have the key of the file extent we want to fix, now we can cow
8345          * down to the thing and fix it.
8346          */
8347         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8348         if (ret < 0) {
8349                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8350                         key.objectid, key.type, key.offset, ret);
8351                 goto out;
8352         }
8353         if (ret > 0) {
8354                 fprintf(stderr, "Well that's odd, we just found this key "
8355                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8356                         key.offset);
8357                 ret = -EINVAL;
8358                 goto out;
8359         }
8360         leaf = path->nodes[0];
8361         fi = btrfs_item_ptr(leaf, path->slots[0],
8362                             struct btrfs_file_extent_item);
8363
8364         if (btrfs_file_extent_compression(leaf, fi) &&
8365             dback->disk_bytenr != entry->bytenr) {
8366                 fprintf(stderr, "Ref doesn't match the record start and is "
8367                         "compressed, please take a btrfs-image of this file "
8368                         "system and send it to a btrfs developer so they can "
8369                         "complete this functionality for bytenr %Lu\n",
8370                         dback->disk_bytenr);
8371                 ret = -EINVAL;
8372                 goto out;
8373         }
8374
8375         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8376                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8377         } else if (dback->disk_bytenr > entry->bytenr) {
8378                 u64 off_diff, offset;
8379
8380                 off_diff = dback->disk_bytenr - entry->bytenr;
8381                 offset = btrfs_file_extent_offset(leaf, fi);
8382                 if (dback->disk_bytenr + offset +
8383                     btrfs_file_extent_num_bytes(leaf, fi) >
8384                     entry->bytenr + entry->bytes) {
8385                         fprintf(stderr, "Ref is past the entry end, please "
8386                                 "take a btrfs-image of this file system and "
8387                                 "send it to a btrfs developer, ref %Lu\n",
8388                                 dback->disk_bytenr);
8389                         ret = -EINVAL;
8390                         goto out;
8391                 }
8392                 offset += off_diff;
8393                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8394                 btrfs_set_file_extent_offset(leaf, fi, offset);
8395         } else if (dback->disk_bytenr < entry->bytenr) {
8396                 u64 offset;
8397
8398                 offset = btrfs_file_extent_offset(leaf, fi);
8399                 if (dback->disk_bytenr + offset < entry->bytenr) {
8400                         fprintf(stderr, "Ref is before the entry start, please"
8401                                 " take a btrfs-image of this file system and "
8402                                 "send it to a btrfs developer, ref %Lu\n",
8403                                 dback->disk_bytenr);
8404                         ret = -EINVAL;
8405                         goto out;
8406                 }
8407
8408                 offset += dback->disk_bytenr;
8409                 offset -= entry->bytenr;
8410                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8411                 btrfs_set_file_extent_offset(leaf, fi, offset);
8412         }
8413
8414         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8415
8416         /*
8417          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8418          * only do this if we aren't using compression, otherwise it's a
8419          * trickier case.
8420          */
8421         if (!btrfs_file_extent_compression(leaf, fi))
8422                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8423         else
8424                 printf("ram bytes may be wrong?\n");
8425         btrfs_mark_buffer_dirty(leaf);
8426 out:
8427         err = btrfs_commit_transaction(trans, root);
8428         btrfs_release_path(path);
8429         return ret ? ret : err;
8430 }
8431
8432 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8433                            struct extent_record *rec)
8434 {
8435         struct extent_backref *back;
8436         struct data_backref *dback;
8437         struct extent_entry *entry, *best = NULL;
8438         LIST_HEAD(entries);
8439         int nr_entries = 0;
8440         int broken_entries = 0;
8441         int ret = 0;
8442         short mismatch = 0;
8443
8444         /*
8445          * Metadata is easy and the backrefs should always agree on bytenr and
8446          * size, if not we've got bigger issues.
8447          */
8448         if (rec->metadata)
8449                 return 0;
8450
8451         list_for_each_entry(back, &rec->backrefs, list) {
8452                 if (back->full_backref || !back->is_data)
8453                         continue;
8454
8455                 dback = to_data_backref(back);
8456
8457                 /*
8458                  * We only pay attention to backrefs that we found a real
8459                  * backref for.
8460                  */
8461                 if (dback->found_ref == 0)
8462                         continue;
8463
8464                 /*
8465                  * For now we only catch when the bytes don't match, not the
8466                  * bytenr.  We can easily do this at the same time, but I want
8467                  * to have a fs image to test on before we just add repair
8468                  * functionality willy-nilly so we know we won't screw up the
8469                  * repair.
8470                  */
8471
8472                 entry = find_entry(&entries, dback->disk_bytenr,
8473                                    dback->bytes);
8474                 if (!entry) {
8475                         entry = malloc(sizeof(struct extent_entry));
8476                         if (!entry) {
8477                                 ret = -ENOMEM;
8478                                 goto out;
8479                         }
8480                         memset(entry, 0, sizeof(*entry));
8481                         entry->bytenr = dback->disk_bytenr;
8482                         entry->bytes = dback->bytes;
8483                         list_add_tail(&entry->list, &entries);
8484                         nr_entries++;
8485                 }
8486
8487                 /*
8488                  * If we only have on entry we may think the entries agree when
8489                  * in reality they don't so we have to do some extra checking.
8490                  */
8491                 if (dback->disk_bytenr != rec->start ||
8492                     dback->bytes != rec->nr || back->broken)
8493                         mismatch = 1;
8494
8495                 if (back->broken) {
8496                         entry->broken++;
8497                         broken_entries++;
8498                 }
8499
8500                 entry->count++;
8501         }
8502
8503         /* Yay all the backrefs agree, carry on good sir */
8504         if (nr_entries <= 1 && !mismatch)
8505                 goto out;
8506
8507         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8508                 "%Lu\n", rec->start);
8509
8510         /*
8511          * First we want to see if the backrefs can agree amongst themselves who
8512          * is right, so figure out which one of the entries has the highest
8513          * count.
8514          */
8515         best = find_most_right_entry(&entries);
8516
8517         /*
8518          * Ok so we may have an even split between what the backrefs think, so
8519          * this is where we use the extent ref to see what it thinks.
8520          */
8521         if (!best) {
8522                 entry = find_entry(&entries, rec->start, rec->nr);
8523                 if (!entry && (!broken_entries || !rec->found_rec)) {
8524                         fprintf(stderr, "Backrefs don't agree with each other "
8525                                 "and extent record doesn't agree with anybody,"
8526                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8527                                 rec->start, rec->nr);
8528                         ret = -EINVAL;
8529                         goto out;
8530                 } else if (!entry) {
8531                         /*
8532                          * Ok our backrefs were broken, we'll assume this is the
8533                          * correct value and add an entry for this range.
8534                          */
8535                         entry = malloc(sizeof(struct extent_entry));
8536                         if (!entry) {
8537                                 ret = -ENOMEM;
8538                                 goto out;
8539                         }
8540                         memset(entry, 0, sizeof(*entry));
8541                         entry->bytenr = rec->start;
8542                         entry->bytes = rec->nr;
8543                         list_add_tail(&entry->list, &entries);
8544                         nr_entries++;
8545                 }
8546                 entry->count++;
8547                 best = find_most_right_entry(&entries);
8548                 if (!best) {
8549                         fprintf(stderr, "Backrefs and extent record evenly "
8550                                 "split on who is right, this is going to "
8551                                 "require user input to fix bytenr %Lu bytes "
8552                                 "%Lu\n", rec->start, rec->nr);
8553                         ret = -EINVAL;
8554                         goto out;
8555                 }
8556         }
8557
8558         /*
8559          * I don't think this can happen currently as we'll abort() if we catch
8560          * this case higher up, but in case somebody removes that we still can't
8561          * deal with it properly here yet, so just bail out of that's the case.
8562          */
8563         if (best->bytenr != rec->start) {
8564                 fprintf(stderr, "Extent start and backref starts don't match, "
8565                         "please use btrfs-image on this file system and send "
8566                         "it to a btrfs developer so they can make fsck fix "
8567                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8568                         rec->start, rec->nr);
8569                 ret = -EINVAL;
8570                 goto out;
8571         }
8572
8573         /*
8574          * Ok great we all agreed on an extent record, let's go find the real
8575          * references and fix up the ones that don't match.
8576          */
8577         list_for_each_entry(back, &rec->backrefs, list) {
8578                 if (back->full_backref || !back->is_data)
8579                         continue;
8580
8581                 dback = to_data_backref(back);
8582
8583                 /*
8584                  * Still ignoring backrefs that don't have a real ref attached
8585                  * to them.
8586                  */
8587                 if (dback->found_ref == 0)
8588                         continue;
8589
8590                 if (dback->bytes == best->bytes &&
8591                     dback->disk_bytenr == best->bytenr)
8592                         continue;
8593
8594                 ret = repair_ref(info, path, dback, best);
8595                 if (ret)
8596                         goto out;
8597         }
8598
8599         /*
8600          * Ok we messed with the actual refs, which means we need to drop our
8601          * entire cache and go back and rescan.  I know this is a huge pain and
8602          * adds a lot of extra work, but it's the only way to be safe.  Once all
8603          * the backrefs agree we may not need to do anything to the extent
8604          * record itself.
8605          */
8606         ret = -EAGAIN;
8607 out:
8608         while (!list_empty(&entries)) {
8609                 entry = list_entry(entries.next, struct extent_entry, list);
8610                 list_del_init(&entry->list);
8611                 free(entry);
8612         }
8613         return ret;
8614 }
8615
8616 static int process_duplicates(struct cache_tree *extent_cache,
8617                               struct extent_record *rec)
8618 {
8619         struct extent_record *good, *tmp;
8620         struct cache_extent *cache;
8621         int ret;
8622
8623         /*
8624          * If we found a extent record for this extent then return, or if we
8625          * have more than one duplicate we are likely going to need to delete
8626          * something.
8627          */
8628         if (rec->found_rec || rec->num_duplicates > 1)
8629                 return 0;
8630
8631         /* Shouldn't happen but just in case */
8632         BUG_ON(!rec->num_duplicates);
8633
8634         /*
8635          * So this happens if we end up with a backref that doesn't match the
8636          * actual extent entry.  So either the backref is bad or the extent
8637          * entry is bad.  Either way we want to have the extent_record actually
8638          * reflect what we found in the extent_tree, so we need to take the
8639          * duplicate out and use that as the extent_record since the only way we
8640          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8641          */
8642         remove_cache_extent(extent_cache, &rec->cache);
8643
8644         good = to_extent_record(rec->dups.next);
8645         list_del_init(&good->list);
8646         INIT_LIST_HEAD(&good->backrefs);
8647         INIT_LIST_HEAD(&good->dups);
8648         good->cache.start = good->start;
8649         good->cache.size = good->nr;
8650         good->content_checked = 0;
8651         good->owner_ref_checked = 0;
8652         good->num_duplicates = 0;
8653         good->refs = rec->refs;
8654         list_splice_init(&rec->backrefs, &good->backrefs);
8655         while (1) {
8656                 cache = lookup_cache_extent(extent_cache, good->start,
8657                                             good->nr);
8658                 if (!cache)
8659                         break;
8660                 tmp = container_of(cache, struct extent_record, cache);
8661
8662                 /*
8663                  * If we find another overlapping extent and it's found_rec is
8664                  * set then it's a duplicate and we need to try and delete
8665                  * something.
8666                  */
8667                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8668                         if (list_empty(&good->list))
8669                                 list_add_tail(&good->list,
8670                                               &duplicate_extents);
8671                         good->num_duplicates += tmp->num_duplicates + 1;
8672                         list_splice_init(&tmp->dups, &good->dups);
8673                         list_del_init(&tmp->list);
8674                         list_add_tail(&tmp->list, &good->dups);
8675                         remove_cache_extent(extent_cache, &tmp->cache);
8676                         continue;
8677                 }
8678
8679                 /*
8680                  * Ok we have another non extent item backed extent rec, so lets
8681                  * just add it to this extent and carry on like we did above.
8682                  */
8683                 good->refs += tmp->refs;
8684                 list_splice_init(&tmp->backrefs, &good->backrefs);
8685                 remove_cache_extent(extent_cache, &tmp->cache);
8686                 free(tmp);
8687         }
8688         ret = insert_cache_extent(extent_cache, &good->cache);
8689         BUG_ON(ret);
8690         free(rec);
8691         return good->num_duplicates ? 0 : 1;
8692 }
8693
8694 static int delete_duplicate_records(struct btrfs_root *root,
8695                                     struct extent_record *rec)
8696 {
8697         struct btrfs_trans_handle *trans;
8698         LIST_HEAD(delete_list);
8699         struct btrfs_path path;
8700         struct extent_record *tmp, *good, *n;
8701         int nr_del = 0;
8702         int ret = 0, err;
8703         struct btrfs_key key;
8704
8705         btrfs_init_path(&path);
8706
8707         good = rec;
8708         /* Find the record that covers all of the duplicates. */
8709         list_for_each_entry(tmp, &rec->dups, list) {
8710                 if (good->start < tmp->start)
8711                         continue;
8712                 if (good->nr > tmp->nr)
8713                         continue;
8714
8715                 if (tmp->start + tmp->nr < good->start + good->nr) {
8716                         fprintf(stderr, "Ok we have overlapping extents that "
8717                                 "aren't completely covered by each other, this "
8718                                 "is going to require more careful thought.  "
8719                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8720                                 tmp->start, tmp->nr, good->start, good->nr);
8721                         abort();
8722                 }
8723                 good = tmp;
8724         }
8725
8726         if (good != rec)
8727                 list_add_tail(&rec->list, &delete_list);
8728
8729         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8730                 if (tmp == good)
8731                         continue;
8732                 list_move_tail(&tmp->list, &delete_list);
8733         }
8734
8735         root = root->fs_info->extent_root;
8736         trans = btrfs_start_transaction(root, 1);
8737         if (IS_ERR(trans)) {
8738                 ret = PTR_ERR(trans);
8739                 goto out;
8740         }
8741
8742         list_for_each_entry(tmp, &delete_list, list) {
8743                 if (tmp->found_rec == 0)
8744                         continue;
8745                 key.objectid = tmp->start;
8746                 key.type = BTRFS_EXTENT_ITEM_KEY;
8747                 key.offset = tmp->nr;
8748
8749                 /* Shouldn't happen but just in case */
8750                 if (tmp->metadata) {
8751                         fprintf(stderr, "Well this shouldn't happen, extent "
8752                                 "record overlaps but is metadata? "
8753                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8754                         abort();
8755                 }
8756
8757                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8758                 if (ret) {
8759                         if (ret > 0)
8760                                 ret = -EINVAL;
8761                         break;
8762                 }
8763                 ret = btrfs_del_item(trans, root, &path);
8764                 if (ret)
8765                         break;
8766                 btrfs_release_path(&path);
8767                 nr_del++;
8768         }
8769         err = btrfs_commit_transaction(trans, root);
8770         if (err && !ret)
8771                 ret = err;
8772 out:
8773         while (!list_empty(&delete_list)) {
8774                 tmp = to_extent_record(delete_list.next);
8775                 list_del_init(&tmp->list);
8776                 if (tmp == rec)
8777                         continue;
8778                 free(tmp);
8779         }
8780
8781         while (!list_empty(&rec->dups)) {
8782                 tmp = to_extent_record(rec->dups.next);
8783                 list_del_init(&tmp->list);
8784                 free(tmp);
8785         }
8786
8787         btrfs_release_path(&path);
8788
8789         if (!ret && !nr_del)
8790                 rec->num_duplicates = 0;
8791
8792         return ret ? ret : nr_del;
8793 }
8794
8795 static int find_possible_backrefs(struct btrfs_fs_info *info,
8796                                   struct btrfs_path *path,
8797                                   struct cache_tree *extent_cache,
8798                                   struct extent_record *rec)
8799 {
8800         struct btrfs_root *root;
8801         struct extent_backref *back;
8802         struct data_backref *dback;
8803         struct cache_extent *cache;
8804         struct btrfs_file_extent_item *fi;
8805         struct btrfs_key key;
8806         u64 bytenr, bytes;
8807         int ret;
8808
8809         list_for_each_entry(back, &rec->backrefs, list) {
8810                 /* Don't care about full backrefs (poor unloved backrefs) */
8811                 if (back->full_backref || !back->is_data)
8812                         continue;
8813
8814                 dback = to_data_backref(back);
8815
8816                 /* We found this one, we don't need to do a lookup */
8817                 if (dback->found_ref)
8818                         continue;
8819
8820                 key.objectid = dback->root;
8821                 key.type = BTRFS_ROOT_ITEM_KEY;
8822                 key.offset = (u64)-1;
8823
8824                 root = btrfs_read_fs_root(info, &key);
8825
8826                 /* No root, definitely a bad ref, skip */
8827                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8828                         continue;
8829                 /* Other err, exit */
8830                 if (IS_ERR(root))
8831                         return PTR_ERR(root);
8832
8833                 key.objectid = dback->owner;
8834                 key.type = BTRFS_EXTENT_DATA_KEY;
8835                 key.offset = dback->offset;
8836                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8837                 if (ret) {
8838                         btrfs_release_path(path);
8839                         if (ret < 0)
8840                                 return ret;
8841                         /* Didn't find it, we can carry on */
8842                         ret = 0;
8843                         continue;
8844                 }
8845
8846                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8847                                     struct btrfs_file_extent_item);
8848                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8849                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8850                 btrfs_release_path(path);
8851                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8852                 if (cache) {
8853                         struct extent_record *tmp;
8854                         tmp = container_of(cache, struct extent_record, cache);
8855
8856                         /*
8857                          * If we found an extent record for the bytenr for this
8858                          * particular backref then we can't add it to our
8859                          * current extent record.  We only want to add backrefs
8860                          * that don't have a corresponding extent item in the
8861                          * extent tree since they likely belong to this record
8862                          * and we need to fix it if it doesn't match bytenrs.
8863                          */
8864                         if  (tmp->found_rec)
8865                                 continue;
8866                 }
8867
8868                 dback->found_ref += 1;
8869                 dback->disk_bytenr = bytenr;
8870                 dback->bytes = bytes;
8871
8872                 /*
8873                  * Set this so the verify backref code knows not to trust the
8874                  * values in this backref.
8875                  */
8876                 back->broken = 1;
8877         }
8878
8879         return 0;
8880 }
8881
8882 /*
8883  * Record orphan data ref into corresponding root.
8884  *
8885  * Return 0 if the extent item contains data ref and recorded.
8886  * Return 1 if the extent item contains no useful data ref
8887  *   On that case, it may contains only shared_dataref or metadata backref
8888  *   or the file extent exists(this should be handled by the extent bytenr
8889  *   recovery routine)
8890  * Return <0 if something goes wrong.
8891  */
8892 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8893                                       struct extent_record *rec)
8894 {
8895         struct btrfs_key key;
8896         struct btrfs_root *dest_root;
8897         struct extent_backref *back;
8898         struct data_backref *dback;
8899         struct orphan_data_extent *orphan;
8900         struct btrfs_path path;
8901         int recorded_data_ref = 0;
8902         int ret = 0;
8903
8904         if (rec->metadata)
8905                 return 1;
8906         btrfs_init_path(&path);
8907         list_for_each_entry(back, &rec->backrefs, list) {
8908                 if (back->full_backref || !back->is_data ||
8909                     !back->found_extent_tree)
8910                         continue;
8911                 dback = to_data_backref(back);
8912                 if (dback->found_ref)
8913                         continue;
8914                 key.objectid = dback->root;
8915                 key.type = BTRFS_ROOT_ITEM_KEY;
8916                 key.offset = (u64)-1;
8917
8918                 dest_root = btrfs_read_fs_root(fs_info, &key);
8919
8920                 /* For non-exist root we just skip it */
8921                 if (IS_ERR(dest_root) || !dest_root)
8922                         continue;
8923
8924                 key.objectid = dback->owner;
8925                 key.type = BTRFS_EXTENT_DATA_KEY;
8926                 key.offset = dback->offset;
8927
8928                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8929                 btrfs_release_path(&path);
8930                 /*
8931                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8932                  * we need to record it for inode/file extent rebuild.
8933                  * For ret > 0, we record it only for file extent rebuild.
8934                  * For ret == 0, the file extent exists but only bytenr
8935                  * mismatch, let the original bytenr fix routine to handle,
8936                  * don't record it.
8937                  */
8938                 if (ret == 0)
8939                         continue;
8940                 ret = 0;
8941                 orphan = malloc(sizeof(*orphan));
8942                 if (!orphan) {
8943                         ret = -ENOMEM;
8944                         goto out;
8945                 }
8946                 INIT_LIST_HEAD(&orphan->list);
8947                 orphan->root = dback->root;
8948                 orphan->objectid = dback->owner;
8949                 orphan->offset = dback->offset;
8950                 orphan->disk_bytenr = rec->cache.start;
8951                 orphan->disk_len = rec->cache.size;
8952                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8953                 recorded_data_ref = 1;
8954         }
8955 out:
8956         btrfs_release_path(&path);
8957         if (!ret)
8958                 return !recorded_data_ref;
8959         else
8960                 return ret;
8961 }
8962
8963 /*
8964  * when an incorrect extent item is found, this will delete
8965  * all of the existing entries for it and recreate them
8966  * based on what the tree scan found.
8967  */
8968 static int fixup_extent_refs(struct btrfs_fs_info *info,
8969                              struct cache_tree *extent_cache,
8970                              struct extent_record *rec)
8971 {
8972         struct btrfs_trans_handle *trans = NULL;
8973         int ret;
8974         struct btrfs_path path;
8975         struct list_head *cur = rec->backrefs.next;
8976         struct cache_extent *cache;
8977         struct extent_backref *back;
8978         int allocated = 0;
8979         u64 flags = 0;
8980
8981         if (rec->flag_block_full_backref)
8982                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8983
8984         btrfs_init_path(&path);
8985         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8986                 /*
8987                  * Sometimes the backrefs themselves are so broken they don't
8988                  * get attached to any meaningful rec, so first go back and
8989                  * check any of our backrefs that we couldn't find and throw
8990                  * them into the list if we find the backref so that
8991                  * verify_backrefs can figure out what to do.
8992                  */
8993                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8994                 if (ret < 0)
8995                         goto out;
8996         }
8997
8998         /* step one, make sure all of the backrefs agree */
8999         ret = verify_backrefs(info, &path, rec);
9000         if (ret < 0)
9001                 goto out;
9002
9003         trans = btrfs_start_transaction(info->extent_root, 1);
9004         if (IS_ERR(trans)) {
9005                 ret = PTR_ERR(trans);
9006                 goto out;
9007         }
9008
9009         /* step two, delete all the existing records */
9010         ret = delete_extent_records(trans, info->extent_root, &path,
9011                                     rec->start);
9012
9013         if (ret < 0)
9014                 goto out;
9015
9016         /* was this block corrupt?  If so, don't add references to it */
9017         cache = lookup_cache_extent(info->corrupt_blocks,
9018                                     rec->start, rec->max_size);
9019         if (cache) {
9020                 ret = 0;
9021                 goto out;
9022         }
9023
9024         /* step three, recreate all the refs we did find */
9025         while(cur != &rec->backrefs) {
9026                 back = to_extent_backref(cur);
9027                 cur = cur->next;
9028
9029                 /*
9030                  * if we didn't find any references, don't create a
9031                  * new extent record
9032                  */
9033                 if (!back->found_ref)
9034                         continue;
9035
9036                 rec->bad_full_backref = 0;
9037                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9038                 allocated = 1;
9039
9040                 if (ret)
9041                         goto out;
9042         }
9043 out:
9044         if (trans) {
9045                 int err = btrfs_commit_transaction(trans, info->extent_root);
9046                 if (!ret)
9047                         ret = err;
9048         }
9049
9050         if (!ret)
9051                 fprintf(stderr, "Repaired extent references for %llu\n",
9052                                 (unsigned long long)rec->start);
9053
9054         btrfs_release_path(&path);
9055         return ret;
9056 }
9057
9058 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9059                               struct extent_record *rec)
9060 {
9061         struct btrfs_trans_handle *trans;
9062         struct btrfs_root *root = fs_info->extent_root;
9063         struct btrfs_path path;
9064         struct btrfs_extent_item *ei;
9065         struct btrfs_key key;
9066         u64 flags;
9067         int ret = 0;
9068
9069         key.objectid = rec->start;
9070         if (rec->metadata) {
9071                 key.type = BTRFS_METADATA_ITEM_KEY;
9072                 key.offset = rec->info_level;
9073         } else {
9074                 key.type = BTRFS_EXTENT_ITEM_KEY;
9075                 key.offset = rec->max_size;
9076         }
9077
9078         trans = btrfs_start_transaction(root, 0);
9079         if (IS_ERR(trans))
9080                 return PTR_ERR(trans);
9081
9082         btrfs_init_path(&path);
9083         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9084         if (ret < 0) {
9085                 btrfs_release_path(&path);
9086                 btrfs_commit_transaction(trans, root);
9087                 return ret;
9088         } else if (ret) {
9089                 fprintf(stderr, "Didn't find extent for %llu\n",
9090                         (unsigned long long)rec->start);
9091                 btrfs_release_path(&path);
9092                 btrfs_commit_transaction(trans, root);
9093                 return -ENOENT;
9094         }
9095
9096         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9097                             struct btrfs_extent_item);
9098         flags = btrfs_extent_flags(path.nodes[0], ei);
9099         if (rec->flag_block_full_backref) {
9100                 fprintf(stderr, "setting full backref on %llu\n",
9101                         (unsigned long long)key.objectid);
9102                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9103         } else {
9104                 fprintf(stderr, "clearing full backref on %llu\n",
9105                         (unsigned long long)key.objectid);
9106                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9107         }
9108         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9109         btrfs_mark_buffer_dirty(path.nodes[0]);
9110         btrfs_release_path(&path);
9111         ret = btrfs_commit_transaction(trans, root);
9112         if (!ret)
9113                 fprintf(stderr, "Repaired extent flags for %llu\n",
9114                                 (unsigned long long)rec->start);
9115
9116         return ret;
9117 }
9118
9119 /* right now we only prune from the extent allocation tree */
9120 static int prune_one_block(struct btrfs_trans_handle *trans,
9121                            struct btrfs_fs_info *info,
9122                            struct btrfs_corrupt_block *corrupt)
9123 {
9124         int ret;
9125         struct btrfs_path path;
9126         struct extent_buffer *eb;
9127         u64 found;
9128         int slot;
9129         int nritems;
9130         int level = corrupt->level + 1;
9131
9132         btrfs_init_path(&path);
9133 again:
9134         /* we want to stop at the parent to our busted block */
9135         path.lowest_level = level;
9136
9137         ret = btrfs_search_slot(trans, info->extent_root,
9138                                 &corrupt->key, &path, -1, 1);
9139
9140         if (ret < 0)
9141                 goto out;
9142
9143         eb = path.nodes[level];
9144         if (!eb) {
9145                 ret = -ENOENT;
9146                 goto out;
9147         }
9148
9149         /*
9150          * hopefully the search gave us the block we want to prune,
9151          * lets try that first
9152          */
9153         slot = path.slots[level];
9154         found =  btrfs_node_blockptr(eb, slot);
9155         if (found == corrupt->cache.start)
9156                 goto del_ptr;
9157
9158         nritems = btrfs_header_nritems(eb);
9159
9160         /* the search failed, lets scan this node and hope we find it */
9161         for (slot = 0; slot < nritems; slot++) {
9162                 found =  btrfs_node_blockptr(eb, slot);
9163                 if (found == corrupt->cache.start)
9164                         goto del_ptr;
9165         }
9166         /*
9167          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9168          * to this block
9169          */
9170         if (eb == info->extent_root->node) {
9171                 ret = -ENOENT;
9172                 goto out;
9173         } else {
9174                 level++;
9175                 btrfs_release_path(&path);
9176                 goto again;
9177         }
9178
9179 del_ptr:
9180         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9181         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9182
9183 out:
9184         btrfs_release_path(&path);
9185         return ret;
9186 }
9187
9188 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9189 {
9190         struct btrfs_trans_handle *trans = NULL;
9191         struct cache_extent *cache;
9192         struct btrfs_corrupt_block *corrupt;
9193
9194         while (1) {
9195                 cache = search_cache_extent(info->corrupt_blocks, 0);
9196                 if (!cache)
9197                         break;
9198                 if (!trans) {
9199                         trans = btrfs_start_transaction(info->extent_root, 1);
9200                         if (IS_ERR(trans))
9201                                 return PTR_ERR(trans);
9202                 }
9203                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9204                 prune_one_block(trans, info, corrupt);
9205                 remove_cache_extent(info->corrupt_blocks, cache);
9206         }
9207         if (trans)
9208                 return btrfs_commit_transaction(trans, info->extent_root);
9209         return 0;
9210 }
9211
9212 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9213 {
9214         struct btrfs_block_group_cache *cache;
9215         u64 start, end;
9216         int ret;
9217
9218         while (1) {
9219                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9220                                             &start, &end, EXTENT_DIRTY);
9221                 if (ret)
9222                         break;
9223                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9224         }
9225
9226         start = 0;
9227         while (1) {
9228                 cache = btrfs_lookup_first_block_group(fs_info, start);
9229                 if (!cache)
9230                         break;
9231                 if (cache->cached)
9232                         cache->cached = 0;
9233                 start = cache->key.objectid + cache->key.offset;
9234         }
9235 }
9236
9237 static int check_extent_refs(struct btrfs_root *root,
9238                              struct cache_tree *extent_cache)
9239 {
9240         struct extent_record *rec;
9241         struct cache_extent *cache;
9242         int ret = 0;
9243         int had_dups = 0;
9244
9245         if (repair) {
9246                 /*
9247                  * if we're doing a repair, we have to make sure
9248                  * we don't allocate from the problem extents.
9249                  * In the worst case, this will be all the
9250                  * extents in the FS
9251                  */
9252                 cache = search_cache_extent(extent_cache, 0);
9253                 while(cache) {
9254                         rec = container_of(cache, struct extent_record, cache);
9255                         set_extent_dirty(root->fs_info->excluded_extents,
9256                                          rec->start,
9257                                          rec->start + rec->max_size - 1);
9258                         cache = next_cache_extent(cache);
9259                 }
9260
9261                 /* pin down all the corrupted blocks too */
9262                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9263                 while(cache) {
9264                         set_extent_dirty(root->fs_info->excluded_extents,
9265                                          cache->start,
9266                                          cache->start + cache->size - 1);
9267                         cache = next_cache_extent(cache);
9268                 }
9269                 prune_corrupt_blocks(root->fs_info);
9270                 reset_cached_block_groups(root->fs_info);
9271         }
9272
9273         reset_cached_block_groups(root->fs_info);
9274
9275         /*
9276          * We need to delete any duplicate entries we find first otherwise we
9277          * could mess up the extent tree when we have backrefs that actually
9278          * belong to a different extent item and not the weird duplicate one.
9279          */
9280         while (repair && !list_empty(&duplicate_extents)) {
9281                 rec = to_extent_record(duplicate_extents.next);
9282                 list_del_init(&rec->list);
9283
9284                 /* Sometimes we can find a backref before we find an actual
9285                  * extent, so we need to process it a little bit to see if there
9286                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9287                  * if this is a backref screwup.  If we need to delete stuff
9288                  * process_duplicates() will return 0, otherwise it will return
9289                  * 1 and we
9290                  */
9291                 if (process_duplicates(extent_cache, rec))
9292                         continue;
9293                 ret = delete_duplicate_records(root, rec);
9294                 if (ret < 0)
9295                         return ret;
9296                 /*
9297                  * delete_duplicate_records will return the number of entries
9298                  * deleted, so if it's greater than 0 then we know we actually
9299                  * did something and we need to remove.
9300                  */
9301                 if (ret)
9302                         had_dups = 1;
9303         }
9304
9305         if (had_dups)
9306                 return -EAGAIN;
9307
9308         while(1) {
9309                 int cur_err = 0;
9310                 int fix = 0;
9311
9312                 cache = search_cache_extent(extent_cache, 0);
9313                 if (!cache)
9314                         break;
9315                 rec = container_of(cache, struct extent_record, cache);
9316                 if (rec->num_duplicates) {
9317                         fprintf(stderr, "extent item %llu has multiple extent "
9318                                 "items\n", (unsigned long long)rec->start);
9319                         cur_err = 1;
9320                 }
9321
9322                 if (rec->refs != rec->extent_item_refs) {
9323                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9324                                 (unsigned long long)rec->start,
9325                                 (unsigned long long)rec->nr);
9326                         fprintf(stderr, "extent item %llu, found %llu\n",
9327                                 (unsigned long long)rec->extent_item_refs,
9328                                 (unsigned long long)rec->refs);
9329                         ret = record_orphan_data_extents(root->fs_info, rec);
9330                         if (ret < 0)
9331                                 goto repair_abort;
9332                         fix = ret;
9333                         cur_err = 1;
9334                 }
9335                 if (all_backpointers_checked(rec, 1)) {
9336                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9337                                 (unsigned long long)rec->start,
9338                                 (unsigned long long)rec->nr);
9339                         fix = 1;
9340                         cur_err = 1;
9341                 }
9342                 if (!rec->owner_ref_checked) {
9343                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9344                                 (unsigned long long)rec->start,
9345                                 (unsigned long long)rec->nr);
9346                         fix = 1;
9347                         cur_err = 1;
9348                 }
9349
9350                 if (repair && fix) {
9351                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9352                         if (ret)
9353                                 goto repair_abort;
9354                 }
9355
9356
9357                 if (rec->bad_full_backref) {
9358                         fprintf(stderr, "bad full backref, on [%llu]\n",
9359                                 (unsigned long long)rec->start);
9360                         if (repair) {
9361                                 ret = fixup_extent_flags(root->fs_info, rec);
9362                                 if (ret)
9363                                         goto repair_abort;
9364                                 fix = 1;
9365                         }
9366                         cur_err = 1;
9367                 }
9368                 /*
9369                  * Although it's not a extent ref's problem, we reuse this
9370                  * routine for error reporting.
9371                  * No repair function yet.
9372                  */
9373                 if (rec->crossing_stripes) {
9374                         fprintf(stderr,
9375                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9376                                 rec->start, rec->start + rec->max_size);
9377                         cur_err = 1;
9378                 }
9379
9380                 if (rec->wrong_chunk_type) {
9381                         fprintf(stderr,
9382                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9383                                 rec->start, rec->start + rec->max_size);
9384                         cur_err = 1;
9385                 }
9386
9387                 remove_cache_extent(extent_cache, cache);
9388                 free_all_extent_backrefs(rec);
9389                 if (!init_extent_tree && repair && (!cur_err || fix))
9390                         clear_extent_dirty(root->fs_info->excluded_extents,
9391                                            rec->start,
9392                                            rec->start + rec->max_size - 1);
9393                 free(rec);
9394         }
9395 repair_abort:
9396         if (repair) {
9397                 if (ret && ret != -EAGAIN) {
9398                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9399                         exit(1);
9400                 } else if (!ret) {
9401                         struct btrfs_trans_handle *trans;
9402
9403                         root = root->fs_info->extent_root;
9404                         trans = btrfs_start_transaction(root, 1);
9405                         if (IS_ERR(trans)) {
9406                                 ret = PTR_ERR(trans);
9407                                 goto repair_abort;
9408                         }
9409
9410                         btrfs_fix_block_accounting(trans, root);
9411                         ret = btrfs_commit_transaction(trans, root);
9412                         if (ret)
9413                                 goto repair_abort;
9414                 }
9415                 return ret;
9416         }
9417         return 0;
9418 }
9419
9420 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9421 {
9422         u64 stripe_size;
9423
9424         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9425                 stripe_size = length;
9426                 stripe_size /= num_stripes;
9427         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9428                 stripe_size = length * 2;
9429                 stripe_size /= num_stripes;
9430         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9431                 stripe_size = length;
9432                 stripe_size /= (num_stripes - 1);
9433         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9434                 stripe_size = length;
9435                 stripe_size /= (num_stripes - 2);
9436         } else {
9437                 stripe_size = length;
9438         }
9439         return stripe_size;
9440 }
9441
9442 /*
9443  * Check the chunk with its block group/dev list ref:
9444  * Return 0 if all refs seems valid.
9445  * Return 1 if part of refs seems valid, need later check for rebuild ref
9446  * like missing block group and needs to search extent tree to rebuild them.
9447  * Return -1 if essential refs are missing and unable to rebuild.
9448  */
9449 static int check_chunk_refs(struct chunk_record *chunk_rec,
9450                             struct block_group_tree *block_group_cache,
9451                             struct device_extent_tree *dev_extent_cache,
9452                             int silent)
9453 {
9454         struct cache_extent *block_group_item;
9455         struct block_group_record *block_group_rec;
9456         struct cache_extent *dev_extent_item;
9457         struct device_extent_record *dev_extent_rec;
9458         u64 devid;
9459         u64 offset;
9460         u64 length;
9461         int metadump_v2 = 0;
9462         int i;
9463         int ret = 0;
9464
9465         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9466                                                chunk_rec->offset,
9467                                                chunk_rec->length);
9468         if (block_group_item) {
9469                 block_group_rec = container_of(block_group_item,
9470                                                struct block_group_record,
9471                                                cache);
9472                 if (chunk_rec->length != block_group_rec->offset ||
9473                     chunk_rec->offset != block_group_rec->objectid ||
9474                     (!metadump_v2 &&
9475                      chunk_rec->type_flags != block_group_rec->flags)) {
9476                         if (!silent)
9477                                 fprintf(stderr,
9478                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9479                                         chunk_rec->objectid,
9480                                         chunk_rec->type,
9481                                         chunk_rec->offset,
9482                                         chunk_rec->length,
9483                                         chunk_rec->offset,
9484                                         chunk_rec->type_flags,
9485                                         block_group_rec->objectid,
9486                                         block_group_rec->type,
9487                                         block_group_rec->offset,
9488                                         block_group_rec->offset,
9489                                         block_group_rec->objectid,
9490                                         block_group_rec->flags);
9491                         ret = -1;
9492                 } else {
9493                         list_del_init(&block_group_rec->list);
9494                         chunk_rec->bg_rec = block_group_rec;
9495                 }
9496         } else {
9497                 if (!silent)
9498                         fprintf(stderr,
9499                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9500                                 chunk_rec->objectid,
9501                                 chunk_rec->type,
9502                                 chunk_rec->offset,
9503                                 chunk_rec->length,
9504                                 chunk_rec->offset,
9505                                 chunk_rec->type_flags);
9506                 ret = 1;
9507         }
9508
9509         if (metadump_v2)
9510                 return ret;
9511
9512         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9513                                     chunk_rec->num_stripes);
9514         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9515                 devid = chunk_rec->stripes[i].devid;
9516                 offset = chunk_rec->stripes[i].offset;
9517                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9518                                                        devid, offset, length);
9519                 if (dev_extent_item) {
9520                         dev_extent_rec = container_of(dev_extent_item,
9521                                                 struct device_extent_record,
9522                                                 cache);
9523                         if (dev_extent_rec->objectid != devid ||
9524                             dev_extent_rec->offset != offset ||
9525                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9526                             dev_extent_rec->length != length) {
9527                                 if (!silent)
9528                                         fprintf(stderr,
9529                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9530                                                 chunk_rec->objectid,
9531                                                 chunk_rec->type,
9532                                                 chunk_rec->offset,
9533                                                 chunk_rec->stripes[i].devid,
9534                                                 chunk_rec->stripes[i].offset,
9535                                                 dev_extent_rec->objectid,
9536                                                 dev_extent_rec->offset,
9537                                                 dev_extent_rec->length);
9538                                 ret = -1;
9539                         } else {
9540                                 list_move(&dev_extent_rec->chunk_list,
9541                                           &chunk_rec->dextents);
9542                         }
9543                 } else {
9544                         if (!silent)
9545                                 fprintf(stderr,
9546                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9547                                         chunk_rec->objectid,
9548                                         chunk_rec->type,
9549                                         chunk_rec->offset,
9550                                         chunk_rec->stripes[i].devid,
9551                                         chunk_rec->stripes[i].offset);
9552                         ret = -1;
9553                 }
9554         }
9555         return ret;
9556 }
9557
9558 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9559 int check_chunks(struct cache_tree *chunk_cache,
9560                  struct block_group_tree *block_group_cache,
9561                  struct device_extent_tree *dev_extent_cache,
9562                  struct list_head *good, struct list_head *bad,
9563                  struct list_head *rebuild, int silent)
9564 {
9565         struct cache_extent *chunk_item;
9566         struct chunk_record *chunk_rec;
9567         struct block_group_record *bg_rec;
9568         struct device_extent_record *dext_rec;
9569         int err;
9570         int ret = 0;
9571
9572         chunk_item = first_cache_extent(chunk_cache);
9573         while (chunk_item) {
9574                 chunk_rec = container_of(chunk_item, struct chunk_record,
9575                                          cache);
9576                 err = check_chunk_refs(chunk_rec, block_group_cache,
9577                                        dev_extent_cache, silent);
9578                 if (err < 0)
9579                         ret = err;
9580                 if (err == 0 && good)
9581                         list_add_tail(&chunk_rec->list, good);
9582                 if (err > 0 && rebuild)
9583                         list_add_tail(&chunk_rec->list, rebuild);
9584                 if (err < 0 && bad)
9585                         list_add_tail(&chunk_rec->list, bad);
9586                 chunk_item = next_cache_extent(chunk_item);
9587         }
9588
9589         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9590                 if (!silent)
9591                         fprintf(stderr,
9592                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9593                                 bg_rec->objectid,
9594                                 bg_rec->offset,
9595                                 bg_rec->flags);
9596                 if (!ret)
9597                         ret = 1;
9598         }
9599
9600         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9601                             chunk_list) {
9602                 if (!silent)
9603                         fprintf(stderr,
9604                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9605                                 dext_rec->objectid,
9606                                 dext_rec->offset,
9607                                 dext_rec->length);
9608                 if (!ret)
9609                         ret = 1;
9610         }
9611         return ret;
9612 }
9613
9614
9615 static int check_device_used(struct device_record *dev_rec,
9616                              struct device_extent_tree *dext_cache)
9617 {
9618         struct cache_extent *cache;
9619         struct device_extent_record *dev_extent_rec;
9620         u64 total_byte = 0;
9621
9622         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9623         while (cache) {
9624                 dev_extent_rec = container_of(cache,
9625                                               struct device_extent_record,
9626                                               cache);
9627                 if (dev_extent_rec->objectid != dev_rec->devid)
9628                         break;
9629
9630                 list_del_init(&dev_extent_rec->device_list);
9631                 total_byte += dev_extent_rec->length;
9632                 cache = next_cache_extent(cache);
9633         }
9634
9635         if (total_byte != dev_rec->byte_used) {
9636                 fprintf(stderr,
9637                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9638                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9639                         dev_rec->type, dev_rec->offset);
9640                 return -1;
9641         } else {
9642                 return 0;
9643         }
9644 }
9645
9646 /* check btrfs_dev_item -> btrfs_dev_extent */
9647 static int check_devices(struct rb_root *dev_cache,
9648                          struct device_extent_tree *dev_extent_cache)
9649 {
9650         struct rb_node *dev_node;
9651         struct device_record *dev_rec;
9652         struct device_extent_record *dext_rec;
9653         int err;
9654         int ret = 0;
9655
9656         dev_node = rb_first(dev_cache);
9657         while (dev_node) {
9658                 dev_rec = container_of(dev_node, struct device_record, node);
9659                 err = check_device_used(dev_rec, dev_extent_cache);
9660                 if (err)
9661                         ret = err;
9662
9663                 dev_node = rb_next(dev_node);
9664         }
9665         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9666                             device_list) {
9667                 fprintf(stderr,
9668                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9669                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9670                 if (!ret)
9671                         ret = 1;
9672         }
9673         return ret;
9674 }
9675
9676 static int add_root_item_to_list(struct list_head *head,
9677                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9678                                   u8 level, u8 drop_level,
9679                                   int level_size, struct btrfs_key *drop_key)
9680 {
9681
9682         struct root_item_record *ri_rec;
9683         ri_rec = malloc(sizeof(*ri_rec));
9684         if (!ri_rec)
9685                 return -ENOMEM;
9686         ri_rec->bytenr = bytenr;
9687         ri_rec->objectid = objectid;
9688         ri_rec->level = level;
9689         ri_rec->level_size = level_size;
9690         ri_rec->drop_level = drop_level;
9691         ri_rec->last_snapshot = last_snapshot;
9692         if (drop_key)
9693                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9694         list_add_tail(&ri_rec->list, head);
9695
9696         return 0;
9697 }
9698
9699 static void free_root_item_list(struct list_head *list)
9700 {
9701         struct root_item_record *ri_rec;
9702
9703         while (!list_empty(list)) {
9704                 ri_rec = list_first_entry(list, struct root_item_record,
9705                                           list);
9706                 list_del_init(&ri_rec->list);
9707                 free(ri_rec);
9708         }
9709 }
9710
9711 static int deal_root_from_list(struct list_head *list,
9712                                struct btrfs_root *root,
9713                                struct block_info *bits,
9714                                int bits_nr,
9715                                struct cache_tree *pending,
9716                                struct cache_tree *seen,
9717                                struct cache_tree *reada,
9718                                struct cache_tree *nodes,
9719                                struct cache_tree *extent_cache,
9720                                struct cache_tree *chunk_cache,
9721                                struct rb_root *dev_cache,
9722                                struct block_group_tree *block_group_cache,
9723                                struct device_extent_tree *dev_extent_cache)
9724 {
9725         int ret = 0;
9726         u64 last;
9727
9728         while (!list_empty(list)) {
9729                 struct root_item_record *rec;
9730                 struct extent_buffer *buf;
9731                 rec = list_entry(list->next,
9732                                  struct root_item_record, list);
9733                 last = 0;
9734                 buf = read_tree_block(root->fs_info->tree_root,
9735                                       rec->bytenr, rec->level_size, 0);
9736                 if (!extent_buffer_uptodate(buf)) {
9737                         free_extent_buffer(buf);
9738                         ret = -EIO;
9739                         break;
9740                 }
9741                 ret = add_root_to_pending(buf, extent_cache, pending,
9742                                     seen, nodes, rec->objectid);
9743                 if (ret < 0)
9744                         break;
9745                 /*
9746                  * To rebuild extent tree, we need deal with snapshot
9747                  * one by one, otherwise we deal with node firstly which
9748                  * can maximize readahead.
9749                  */
9750                 while (1) {
9751                         ret = run_next_block(root, bits, bits_nr, &last,
9752                                              pending, seen, reada, nodes,
9753                                              extent_cache, chunk_cache,
9754                                              dev_cache, block_group_cache,
9755                                              dev_extent_cache, rec);
9756                         if (ret != 0)
9757                                 break;
9758                 }
9759                 free_extent_buffer(buf);
9760                 list_del(&rec->list);
9761                 free(rec);
9762                 if (ret < 0)
9763                         break;
9764         }
9765         while (ret >= 0) {
9766                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9767                                      reada, nodes, extent_cache, chunk_cache,
9768                                      dev_cache, block_group_cache,
9769                                      dev_extent_cache, NULL);
9770                 if (ret != 0) {
9771                         if (ret > 0)
9772                                 ret = 0;
9773                         break;
9774                 }
9775         }
9776         return ret;
9777 }
9778
9779 static int check_chunks_and_extents(struct btrfs_root *root)
9780 {
9781         struct rb_root dev_cache;
9782         struct cache_tree chunk_cache;
9783         struct block_group_tree block_group_cache;
9784         struct device_extent_tree dev_extent_cache;
9785         struct cache_tree extent_cache;
9786         struct cache_tree seen;
9787         struct cache_tree pending;
9788         struct cache_tree reada;
9789         struct cache_tree nodes;
9790         struct extent_io_tree excluded_extents;
9791         struct cache_tree corrupt_blocks;
9792         struct btrfs_path path;
9793         struct btrfs_key key;
9794         struct btrfs_key found_key;
9795         int ret, err = 0;
9796         struct block_info *bits;
9797         int bits_nr;
9798         struct extent_buffer *leaf;
9799         int slot;
9800         struct btrfs_root_item ri;
9801         struct list_head dropping_trees;
9802         struct list_head normal_trees;
9803         struct btrfs_root *root1;
9804         u64 objectid;
9805         u32 level_size;
9806         u8 level;
9807
9808         dev_cache = RB_ROOT;
9809         cache_tree_init(&chunk_cache);
9810         block_group_tree_init(&block_group_cache);
9811         device_extent_tree_init(&dev_extent_cache);
9812
9813         cache_tree_init(&extent_cache);
9814         cache_tree_init(&seen);
9815         cache_tree_init(&pending);
9816         cache_tree_init(&nodes);
9817         cache_tree_init(&reada);
9818         cache_tree_init(&corrupt_blocks);
9819         extent_io_tree_init(&excluded_extents);
9820         INIT_LIST_HEAD(&dropping_trees);
9821         INIT_LIST_HEAD(&normal_trees);
9822
9823         if (repair) {
9824                 root->fs_info->excluded_extents = &excluded_extents;
9825                 root->fs_info->fsck_extent_cache = &extent_cache;
9826                 root->fs_info->free_extent_hook = free_extent_hook;
9827                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9828         }
9829
9830         bits_nr = 1024;
9831         bits = malloc(bits_nr * sizeof(struct block_info));
9832         if (!bits) {
9833                 perror("malloc");
9834                 exit(1);
9835         }
9836
9837         if (ctx.progress_enabled) {
9838                 ctx.tp = TASK_EXTENTS;
9839                 task_start(ctx.info);
9840         }
9841
9842 again:
9843         root1 = root->fs_info->tree_root;
9844         level = btrfs_header_level(root1->node);
9845         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9846                                     root1->node->start, 0, level, 0,
9847                                     root1->nodesize, NULL);
9848         if (ret < 0)
9849                 goto out;
9850         root1 = root->fs_info->chunk_root;
9851         level = btrfs_header_level(root1->node);
9852         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9853                                     root1->node->start, 0, level, 0,
9854                                     root1->nodesize, NULL);
9855         if (ret < 0)
9856                 goto out;
9857         btrfs_init_path(&path);
9858         key.offset = 0;
9859         key.objectid = 0;
9860         key.type = BTRFS_ROOT_ITEM_KEY;
9861         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9862                                         &key, &path, 0, 0);
9863         if (ret < 0)
9864                 goto out;
9865         while(1) {
9866                 leaf = path.nodes[0];
9867                 slot = path.slots[0];
9868                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9869                         ret = btrfs_next_leaf(root, &path);
9870                         if (ret != 0)
9871                                 break;
9872                         leaf = path.nodes[0];
9873                         slot = path.slots[0];
9874                 }
9875                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9876                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9877                         unsigned long offset;
9878                         u64 last_snapshot;
9879
9880                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9881                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9882                         last_snapshot = btrfs_root_last_snapshot(&ri);
9883                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9884                                 level = btrfs_root_level(&ri);
9885                                 level_size = root->nodesize;
9886                                 ret = add_root_item_to_list(&normal_trees,
9887                                                 found_key.objectid,
9888                                                 btrfs_root_bytenr(&ri),
9889                                                 last_snapshot, level,
9890                                                 0, level_size, NULL);
9891                                 if (ret < 0)
9892                                         goto out;
9893                         } else {
9894                                 level = btrfs_root_level(&ri);
9895                                 level_size = root->nodesize;
9896                                 objectid = found_key.objectid;
9897                                 btrfs_disk_key_to_cpu(&found_key,
9898                                                       &ri.drop_progress);
9899                                 ret = add_root_item_to_list(&dropping_trees,
9900                                                 objectid,
9901                                                 btrfs_root_bytenr(&ri),
9902                                                 last_snapshot, level,
9903                                                 ri.drop_level,
9904                                                 level_size, &found_key);
9905                                 if (ret < 0)
9906                                         goto out;
9907                         }
9908                 }
9909                 path.slots[0]++;
9910         }
9911         btrfs_release_path(&path);
9912
9913         /*
9914          * check_block can return -EAGAIN if it fixes something, please keep
9915          * this in mind when dealing with return values from these functions, if
9916          * we get -EAGAIN we want to fall through and restart the loop.
9917          */
9918         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9919                                   &seen, &reada, &nodes, &extent_cache,
9920                                   &chunk_cache, &dev_cache, &block_group_cache,
9921                                   &dev_extent_cache);
9922         if (ret < 0) {
9923                 if (ret == -EAGAIN)
9924                         goto loop;
9925                 goto out;
9926         }
9927         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9928                                   &pending, &seen, &reada, &nodes,
9929                                   &extent_cache, &chunk_cache, &dev_cache,
9930                                   &block_group_cache, &dev_extent_cache);
9931         if (ret < 0) {
9932                 if (ret == -EAGAIN)
9933                         goto loop;
9934                 goto out;
9935         }
9936
9937         ret = check_chunks(&chunk_cache, &block_group_cache,
9938                            &dev_extent_cache, NULL, NULL, NULL, 0);
9939         if (ret) {
9940                 if (ret == -EAGAIN)
9941                         goto loop;
9942                 err = ret;
9943         }
9944
9945         ret = check_extent_refs(root, &extent_cache);
9946         if (ret < 0) {
9947                 if (ret == -EAGAIN)
9948                         goto loop;
9949                 goto out;
9950         }
9951
9952         ret = check_devices(&dev_cache, &dev_extent_cache);
9953         if (ret && err)
9954                 ret = err;
9955
9956 out:
9957         task_stop(ctx.info);
9958         if (repair) {
9959                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9960                 extent_io_tree_cleanup(&excluded_extents);
9961                 root->fs_info->fsck_extent_cache = NULL;
9962                 root->fs_info->free_extent_hook = NULL;
9963                 root->fs_info->corrupt_blocks = NULL;
9964                 root->fs_info->excluded_extents = NULL;
9965         }
9966         free(bits);
9967         free_chunk_cache_tree(&chunk_cache);
9968         free_device_cache_tree(&dev_cache);
9969         free_block_group_tree(&block_group_cache);
9970         free_device_extent_tree(&dev_extent_cache);
9971         free_extent_cache_tree(&seen);
9972         free_extent_cache_tree(&pending);
9973         free_extent_cache_tree(&reada);
9974         free_extent_cache_tree(&nodes);
9975         free_root_item_list(&normal_trees);
9976         free_root_item_list(&dropping_trees);
9977         return ret;
9978 loop:
9979         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9980         free_extent_cache_tree(&seen);
9981         free_extent_cache_tree(&pending);
9982         free_extent_cache_tree(&reada);
9983         free_extent_cache_tree(&nodes);
9984         free_chunk_cache_tree(&chunk_cache);
9985         free_block_group_tree(&block_group_cache);
9986         free_device_cache_tree(&dev_cache);
9987         free_device_extent_tree(&dev_extent_cache);
9988         free_extent_record_cache(&extent_cache);
9989         free_root_item_list(&normal_trees);
9990         free_root_item_list(&dropping_trees);
9991         extent_io_tree_cleanup(&excluded_extents);
9992         goto again;
9993 }
9994
9995 /*
9996  * Check backrefs of a tree block given by @bytenr or @eb.
9997  *
9998  * @root:       the root containing the @bytenr or @eb
9999  * @eb:         tree block extent buffer, can be NULL
10000  * @bytenr:     bytenr of the tree block to search
10001  * @level:      tree level of the tree block
10002  * @owner:      owner of the tree block
10003  *
10004  * Return >0 for any error found and output error message
10005  * Return 0 for no error found
10006  */
10007 static int check_tree_block_ref(struct btrfs_root *root,
10008                                 struct extent_buffer *eb, u64 bytenr,
10009                                 int level, u64 owner)
10010 {
10011         struct btrfs_key key;
10012         struct btrfs_root *extent_root = root->fs_info->extent_root;
10013         struct btrfs_path path;
10014         struct btrfs_extent_item *ei;
10015         struct btrfs_extent_inline_ref *iref;
10016         struct extent_buffer *leaf;
10017         unsigned long end;
10018         unsigned long ptr;
10019         int slot;
10020         int skinny_level;
10021         int type;
10022         u32 nodesize = root->nodesize;
10023         u32 item_size;
10024         u64 offset;
10025         int tree_reloc_root = 0;
10026         int found_ref = 0;
10027         int err = 0;
10028         int ret;
10029
10030         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10031             btrfs_header_bytenr(root->node) == bytenr)
10032                 tree_reloc_root = 1;
10033
10034         btrfs_init_path(&path);
10035         key.objectid = bytenr;
10036         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10037                 key.type = BTRFS_METADATA_ITEM_KEY;
10038         else
10039                 key.type = BTRFS_EXTENT_ITEM_KEY;
10040         key.offset = (u64)-1;
10041
10042         /* Search for the backref in extent tree */
10043         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10044         if (ret < 0) {
10045                 err |= BACKREF_MISSING;
10046                 goto out;
10047         }
10048         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10049         if (ret) {
10050                 err |= BACKREF_MISSING;
10051                 goto out;
10052         }
10053
10054         leaf = path.nodes[0];
10055         slot = path.slots[0];
10056         btrfs_item_key_to_cpu(leaf, &key, slot);
10057
10058         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10059
10060         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10061                 skinny_level = (int)key.offset;
10062                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10063         } else {
10064                 struct btrfs_tree_block_info *info;
10065
10066                 info = (struct btrfs_tree_block_info *)(ei + 1);
10067                 skinny_level = btrfs_tree_block_level(leaf, info);
10068                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10069         }
10070
10071         if (eb) {
10072                 u64 header_gen;
10073                 u64 extent_gen;
10074
10075                 if (!(btrfs_extent_flags(leaf, ei) &
10076                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10077                         error(
10078                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10079                                 key.objectid, nodesize,
10080                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10081                         err = BACKREF_MISMATCH;
10082                 }
10083                 header_gen = btrfs_header_generation(eb);
10084                 extent_gen = btrfs_extent_generation(leaf, ei);
10085                 if (header_gen != extent_gen) {
10086                         error(
10087         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10088                                 key.objectid, nodesize, header_gen,
10089                                 extent_gen);
10090                         err = BACKREF_MISMATCH;
10091                 }
10092                 if (level != skinny_level) {
10093                         error(
10094                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10095                                 key.objectid, nodesize, level, skinny_level);
10096                         err = BACKREF_MISMATCH;
10097                 }
10098                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10099                         error(
10100                         "extent[%llu %u] is referred by other roots than %llu",
10101                                 key.objectid, nodesize, root->objectid);
10102                         err = BACKREF_MISMATCH;
10103                 }
10104         }
10105
10106         /*
10107          * Iterate the extent/metadata item to find the exact backref
10108          */
10109         item_size = btrfs_item_size_nr(leaf, slot);
10110         ptr = (unsigned long)iref;
10111         end = (unsigned long)ei + item_size;
10112         while (ptr < end) {
10113                 iref = (struct btrfs_extent_inline_ref *)ptr;
10114                 type = btrfs_extent_inline_ref_type(leaf, iref);
10115                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10116
10117                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10118                         (offset == root->objectid || offset == owner)) {
10119                         found_ref = 1;
10120                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10121                         /*
10122                          * Backref of tree reloc root points to itself, no need
10123                          * to check backref any more.
10124                          */
10125                         if (tree_reloc_root)
10126                                 found_ref = 1;
10127                         else
10128                         /* Check if the backref points to valid referencer */
10129                                 found_ref = !check_tree_block_ref(root, NULL,
10130                                                 offset, level + 1, owner);
10131                 }
10132
10133                 if (found_ref)
10134                         break;
10135                 ptr += btrfs_extent_inline_ref_size(type);
10136         }
10137
10138         /*
10139          * Inlined extent item doesn't have what we need, check
10140          * TREE_BLOCK_REF_KEY
10141          */
10142         if (!found_ref) {
10143                 btrfs_release_path(&path);
10144                 key.objectid = bytenr;
10145                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10146                 key.offset = root->objectid;
10147
10148                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10149                 if (!ret)
10150                         found_ref = 1;
10151         }
10152         if (!found_ref)
10153                 err |= BACKREF_MISSING;
10154 out:
10155         btrfs_release_path(&path);
10156         if (eb && (err & BACKREF_MISSING))
10157                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10158                         bytenr, nodesize, owner, level);
10159         return err;
10160 }
10161
10162 /*
10163  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10164  *
10165  * Return >0 any error found and output error message
10166  * Return 0 for no error found
10167  */
10168 static int check_extent_data_item(struct btrfs_root *root,
10169                                   struct extent_buffer *eb, int slot)
10170 {
10171         struct btrfs_file_extent_item *fi;
10172         struct btrfs_path path;
10173         struct btrfs_root *extent_root = root->fs_info->extent_root;
10174         struct btrfs_key fi_key;
10175         struct btrfs_key dbref_key;
10176         struct extent_buffer *leaf;
10177         struct btrfs_extent_item *ei;
10178         struct btrfs_extent_inline_ref *iref;
10179         struct btrfs_extent_data_ref *dref;
10180         u64 owner;
10181         u64 disk_bytenr;
10182         u64 disk_num_bytes;
10183         u64 extent_num_bytes;
10184         u64 extent_flags;
10185         u32 item_size;
10186         unsigned long end;
10187         unsigned long ptr;
10188         int type;
10189         u64 ref_root;
10190         int found_dbackref = 0;
10191         int err = 0;
10192         int ret;
10193
10194         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10195         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10196
10197         /* Nothing to check for hole and inline data extents */
10198         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10199             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10200                 return 0;
10201
10202         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10203         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10204         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10205
10206         /* Check unaligned disk_num_bytes and num_bytes */
10207         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10208                 error(
10209 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10210                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10211                         root->sectorsize);
10212                 err |= BYTES_UNALIGNED;
10213         } else {
10214                 data_bytes_allocated += disk_num_bytes;
10215         }
10216         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10217                 error(
10218 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10219                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10220                         root->sectorsize);
10221                 err |= BYTES_UNALIGNED;
10222         } else {
10223                 data_bytes_referenced += extent_num_bytes;
10224         }
10225         owner = btrfs_header_owner(eb);
10226
10227         /* Check the extent item of the file extent in extent tree */
10228         btrfs_init_path(&path);
10229         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10230         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10231         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10232
10233         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10234         if (ret)
10235                 goto out;
10236
10237         leaf = path.nodes[0];
10238         slot = path.slots[0];
10239         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10240
10241         extent_flags = btrfs_extent_flags(leaf, ei);
10242
10243         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10244                 error(
10245                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10246                     disk_bytenr, disk_num_bytes,
10247                     BTRFS_EXTENT_FLAG_DATA);
10248                 err |= BACKREF_MISMATCH;
10249         }
10250
10251         /* Check data backref inside that extent item */
10252         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10253         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10254         ptr = (unsigned long)iref;
10255         end = (unsigned long)ei + item_size;
10256         while (ptr < end) {
10257                 iref = (struct btrfs_extent_inline_ref *)ptr;
10258                 type = btrfs_extent_inline_ref_type(leaf, iref);
10259                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10260
10261                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10262                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10263                         if (ref_root == owner || ref_root == root->objectid)
10264                                 found_dbackref = 1;
10265                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10266                         found_dbackref = !check_tree_block_ref(root, NULL,
10267                                 btrfs_extent_inline_ref_offset(leaf, iref),
10268                                 0, owner);
10269                 }
10270
10271                 if (found_dbackref)
10272                         break;
10273                 ptr += btrfs_extent_inline_ref_size(type);
10274         }
10275
10276         if (!found_dbackref) {
10277                 btrfs_release_path(&path);
10278
10279                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10280                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10281                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10282                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10283                                 fi_key.objectid, fi_key.offset);
10284
10285                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10286                                         &dbref_key, &path, 0, 0);
10287                 if (!ret) {
10288                         found_dbackref = 1;
10289                         goto out;
10290                 }
10291
10292                 btrfs_release_path(&path);
10293
10294                 /*
10295                  * Neither inlined nor EXTENT_DATA_REF found, try
10296                  * SHARED_DATA_REF as last chance.
10297                  */
10298                 dbref_key.objectid = disk_bytenr;
10299                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10300                 dbref_key.offset = eb->start;
10301
10302                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10303                                         &dbref_key, &path, 0, 0);
10304                 if (!ret) {
10305                         found_dbackref = 1;
10306                         goto out;
10307                 }
10308         }
10309
10310 out:
10311         if (!found_dbackref)
10312                 err |= BACKREF_MISSING;
10313         btrfs_release_path(&path);
10314         if (err & BACKREF_MISSING) {
10315                 error("data extent[%llu %llu] backref lost",
10316                       disk_bytenr, disk_num_bytes);
10317         }
10318         return err;
10319 }
10320
10321 /*
10322  * Get real tree block level for the case like shared block
10323  * Return >= 0 as tree level
10324  * Return <0 for error
10325  */
10326 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10327 {
10328         struct extent_buffer *eb;
10329         struct btrfs_path path;
10330         struct btrfs_key key;
10331         struct btrfs_extent_item *ei;
10332         u64 flags;
10333         u64 transid;
10334         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10335         u8 backref_level;
10336         u8 header_level;
10337         int ret;
10338
10339         /* Search extent tree for extent generation and level */
10340         key.objectid = bytenr;
10341         key.type = BTRFS_METADATA_ITEM_KEY;
10342         key.offset = (u64)-1;
10343
10344         btrfs_init_path(&path);
10345         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10346         if (ret < 0)
10347                 goto release_out;
10348         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10349         if (ret < 0)
10350                 goto release_out;
10351         if (ret > 0) {
10352                 ret = -ENOENT;
10353                 goto release_out;
10354         }
10355
10356         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10357         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10358                             struct btrfs_extent_item);
10359         flags = btrfs_extent_flags(path.nodes[0], ei);
10360         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10361                 ret = -ENOENT;
10362                 goto release_out;
10363         }
10364
10365         /* Get transid for later read_tree_block() check */
10366         transid = btrfs_extent_generation(path.nodes[0], ei);
10367
10368         /* Get backref level as one source */
10369         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10370                 backref_level = key.offset;
10371         } else {
10372                 struct btrfs_tree_block_info *info;
10373
10374                 info = (struct btrfs_tree_block_info *)(ei + 1);
10375                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10376         }
10377         btrfs_release_path(&path);
10378
10379         /* Get level from tree block as an alternative source */
10380         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10381         if (!extent_buffer_uptodate(eb)) {
10382                 free_extent_buffer(eb);
10383                 return -EIO;
10384         }
10385         header_level = btrfs_header_level(eb);
10386         free_extent_buffer(eb);
10387
10388         if (header_level != backref_level)
10389                 return -EIO;
10390         return header_level;
10391
10392 release_out:
10393         btrfs_release_path(&path);
10394         return ret;
10395 }
10396
10397 /*
10398  * Check if a tree block backref is valid (points to a valid tree block)
10399  * if level == -1, level will be resolved
10400  * Return >0 for any error found and print error message
10401  */
10402 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10403                                     u64 bytenr, int level)
10404 {
10405         struct btrfs_root *root;
10406         struct btrfs_key key;
10407         struct btrfs_path path;
10408         struct extent_buffer *eb;
10409         struct extent_buffer *node;
10410         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10411         int err = 0;
10412         int ret;
10413
10414         /* Query level for level == -1 special case */
10415         if (level == -1)
10416                 level = query_tree_block_level(fs_info, bytenr);
10417         if (level < 0) {
10418                 err |= REFERENCER_MISSING;
10419                 goto out;
10420         }
10421
10422         key.objectid = root_id;
10423         key.type = BTRFS_ROOT_ITEM_KEY;
10424         key.offset = (u64)-1;
10425
10426         root = btrfs_read_fs_root(fs_info, &key);
10427         if (IS_ERR(root)) {
10428                 err |= REFERENCER_MISSING;
10429                 goto out;
10430         }
10431
10432         /* Read out the tree block to get item/node key */
10433         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10434         if (!extent_buffer_uptodate(eb)) {
10435                 err |= REFERENCER_MISSING;
10436                 free_extent_buffer(eb);
10437                 goto out;
10438         }
10439
10440         /* Empty tree, no need to check key */
10441         if (!btrfs_header_nritems(eb) && !level) {
10442                 free_extent_buffer(eb);
10443                 goto out;
10444         }
10445
10446         if (level)
10447                 btrfs_node_key_to_cpu(eb, &key, 0);
10448         else
10449                 btrfs_item_key_to_cpu(eb, &key, 0);
10450
10451         free_extent_buffer(eb);
10452
10453         btrfs_init_path(&path);
10454         path.lowest_level = level;
10455         /* Search with the first key, to ensure we can reach it */
10456         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10457         if (ret < 0) {
10458                 err |= REFERENCER_MISSING;
10459                 goto release_out;
10460         }
10461
10462         node = path.nodes[level];
10463         if (btrfs_header_bytenr(node) != bytenr) {
10464                 error(
10465         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10466                         bytenr, nodesize, bytenr,
10467                         btrfs_header_bytenr(node));
10468                 err |= REFERENCER_MISMATCH;
10469         }
10470         if (btrfs_header_level(node) != level) {
10471                 error(
10472         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10473                         bytenr, nodesize, level,
10474                         btrfs_header_level(node));
10475                 err |= REFERENCER_MISMATCH;
10476         }
10477
10478 release_out:
10479         btrfs_release_path(&path);
10480 out:
10481         if (err & REFERENCER_MISSING) {
10482                 if (level < 0)
10483                         error("extent [%llu %d] lost referencer (owner: %llu)",
10484                                 bytenr, nodesize, root_id);
10485                 else
10486                         error(
10487                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10488                                 bytenr, nodesize, root_id, level);
10489         }
10490
10491         return err;
10492 }
10493
10494 /*
10495  * Check if tree block @eb is tree reloc root.
10496  * Return 0 if it's not or any problem happens
10497  * Return 1 if it's a tree reloc root
10498  */
10499 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10500                                  struct extent_buffer *eb)
10501 {
10502         struct btrfs_root *tree_reloc_root;
10503         struct btrfs_key key;
10504         u64 bytenr = btrfs_header_bytenr(eb);
10505         u64 owner = btrfs_header_owner(eb);
10506         int ret = 0;
10507
10508         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10509         key.offset = owner;
10510         key.type = BTRFS_ROOT_ITEM_KEY;
10511
10512         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10513         if (IS_ERR(tree_reloc_root))
10514                 return 0;
10515
10516         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10517                 ret = 1;
10518         btrfs_free_fs_root(tree_reloc_root);
10519         return ret;
10520 }
10521
10522 /*
10523  * Check referencer for shared block backref
10524  * If level == -1, this function will resolve the level.
10525  */
10526 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10527                                      u64 parent, u64 bytenr, int level)
10528 {
10529         struct extent_buffer *eb;
10530         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10531         u32 nr;
10532         int found_parent = 0;
10533         int i;
10534
10535         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10536         if (!extent_buffer_uptodate(eb))
10537                 goto out;
10538
10539         if (level == -1)
10540                 level = query_tree_block_level(fs_info, bytenr);
10541         if (level < 0)
10542                 goto out;
10543
10544         /* It's possible it's a tree reloc root */
10545         if (parent == bytenr) {
10546                 if (is_tree_reloc_root(fs_info, eb))
10547                         found_parent = 1;
10548                 goto out;
10549         }
10550
10551         if (level + 1 != btrfs_header_level(eb))
10552                 goto out;
10553
10554         nr = btrfs_header_nritems(eb);
10555         for (i = 0; i < nr; i++) {
10556                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10557                         found_parent = 1;
10558                         break;
10559                 }
10560         }
10561 out:
10562         free_extent_buffer(eb);
10563         if (!found_parent) {
10564                 error(
10565         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10566                         bytenr, nodesize, parent, level);
10567                 return REFERENCER_MISSING;
10568         }
10569         return 0;
10570 }
10571
10572 /*
10573  * Check referencer for normal (inlined) data ref
10574  * If len == 0, it will be resolved by searching in extent tree
10575  */
10576 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10577                                      u64 root_id, u64 objectid, u64 offset,
10578                                      u64 bytenr, u64 len, u32 count)
10579 {
10580         struct btrfs_root *root;
10581         struct btrfs_root *extent_root = fs_info->extent_root;
10582         struct btrfs_key key;
10583         struct btrfs_path path;
10584         struct extent_buffer *leaf;
10585         struct btrfs_file_extent_item *fi;
10586         u32 found_count = 0;
10587         int slot;
10588         int ret = 0;
10589
10590         if (!len) {
10591                 key.objectid = bytenr;
10592                 key.type = BTRFS_EXTENT_ITEM_KEY;
10593                 key.offset = (u64)-1;
10594
10595                 btrfs_init_path(&path);
10596                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10597                 if (ret < 0)
10598                         goto out;
10599                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10600                 if (ret)
10601                         goto out;
10602                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10603                 if (key.objectid != bytenr ||
10604                     key.type != BTRFS_EXTENT_ITEM_KEY)
10605                         goto out;
10606                 len = key.offset;
10607                 btrfs_release_path(&path);
10608         }
10609         key.objectid = root_id;
10610         key.type = BTRFS_ROOT_ITEM_KEY;
10611         key.offset = (u64)-1;
10612         btrfs_init_path(&path);
10613
10614         root = btrfs_read_fs_root(fs_info, &key);
10615         if (IS_ERR(root))
10616                 goto out;
10617
10618         key.objectid = objectid;
10619         key.type = BTRFS_EXTENT_DATA_KEY;
10620         /*
10621          * It can be nasty as data backref offset is
10622          * file offset - file extent offset, which is smaller or
10623          * equal to original backref offset.  The only special case is
10624          * overflow.  So we need to special check and do further search.
10625          */
10626         key.offset = offset & (1ULL << 63) ? 0 : offset;
10627
10628         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10629         if (ret < 0)
10630                 goto out;
10631
10632         /*
10633          * Search afterwards to get correct one
10634          * NOTE: As we must do a comprehensive check on the data backref to
10635          * make sure the dref count also matches, we must iterate all file
10636          * extents for that inode.
10637          */
10638         while (1) {
10639                 leaf = path.nodes[0];
10640                 slot = path.slots[0];
10641
10642                 if (slot >= btrfs_header_nritems(leaf))
10643                         goto next;
10644                 btrfs_item_key_to_cpu(leaf, &key, slot);
10645                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10646                         break;
10647                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10648                 /*
10649                  * Except normal disk bytenr and disk num bytes, we still
10650                  * need to do extra check on dbackref offset as
10651                  * dbackref offset = file_offset - file_extent_offset
10652                  */
10653                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10654                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10655                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10656                     offset)
10657                         found_count++;
10658
10659 next:
10660                 ret = btrfs_next_item(root, &path);
10661                 if (ret)
10662                         break;
10663         }
10664 out:
10665         btrfs_release_path(&path);
10666         if (found_count != count) {
10667                 error(
10668 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10669                         bytenr, len, root_id, objectid, offset, count, found_count);
10670                 return REFERENCER_MISSING;
10671         }
10672         return 0;
10673 }
10674
10675 /*
10676  * Check if the referencer of a shared data backref exists
10677  */
10678 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10679                                      u64 parent, u64 bytenr)
10680 {
10681         struct extent_buffer *eb;
10682         struct btrfs_key key;
10683         struct btrfs_file_extent_item *fi;
10684         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10685         u32 nr;
10686         int found_parent = 0;
10687         int i;
10688
10689         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10690         if (!extent_buffer_uptodate(eb))
10691                 goto out;
10692
10693         nr = btrfs_header_nritems(eb);
10694         for (i = 0; i < nr; i++) {
10695                 btrfs_item_key_to_cpu(eb, &key, i);
10696                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10697                         continue;
10698
10699                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10700                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10701                         continue;
10702
10703                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10704                         found_parent = 1;
10705                         break;
10706                 }
10707         }
10708
10709 out:
10710         free_extent_buffer(eb);
10711         if (!found_parent) {
10712                 error("shared extent %llu referencer lost (parent: %llu)",
10713                         bytenr, parent);
10714                 return REFERENCER_MISSING;
10715         }
10716         return 0;
10717 }
10718
10719 /*
10720  * This function will check a given extent item, including its backref and
10721  * itself (like crossing stripe boundary and type)
10722  *
10723  * Since we don't use extent_record anymore, introduce new error bit
10724  */
10725 static int check_extent_item(struct btrfs_fs_info *fs_info,
10726                              struct extent_buffer *eb, int slot)
10727 {
10728         struct btrfs_extent_item *ei;
10729         struct btrfs_extent_inline_ref *iref;
10730         struct btrfs_extent_data_ref *dref;
10731         unsigned long end;
10732         unsigned long ptr;
10733         int type;
10734         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10735         u32 item_size = btrfs_item_size_nr(eb, slot);
10736         u64 flags;
10737         u64 offset;
10738         int metadata = 0;
10739         int level;
10740         struct btrfs_key key;
10741         int ret;
10742         int err = 0;
10743
10744         btrfs_item_key_to_cpu(eb, &key, slot);
10745         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10746                 bytes_used += key.offset;
10747         else
10748                 bytes_used += nodesize;
10749
10750         if (item_size < sizeof(*ei)) {
10751                 /*
10752                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10753                  * old thing when on disk format is still un-determined.
10754                  * No need to care about it anymore
10755                  */
10756                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10757                 return -ENOTTY;
10758         }
10759
10760         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10761         flags = btrfs_extent_flags(eb, ei);
10762
10763         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10764                 metadata = 1;
10765         if (metadata && check_crossing_stripes(global_info, key.objectid,
10766                                                eb->len)) {
10767                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10768                       key.objectid, key.objectid + nodesize);
10769                 err |= CROSSING_STRIPE_BOUNDARY;
10770         }
10771
10772         ptr = (unsigned long)(ei + 1);
10773
10774         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10775                 /* Old EXTENT_ITEM metadata */
10776                 struct btrfs_tree_block_info *info;
10777
10778                 info = (struct btrfs_tree_block_info *)ptr;
10779                 level = btrfs_tree_block_level(eb, info);
10780                 ptr += sizeof(struct btrfs_tree_block_info);
10781         } else {
10782                 /* New METADATA_ITEM */
10783                 level = key.offset;
10784         }
10785         end = (unsigned long)ei + item_size;
10786
10787 next:
10788         /* Reached extent item end normally */
10789         if (ptr == end)
10790                 goto out;
10791
10792         /* Beyond extent item end, wrong item size */
10793         if (ptr > end) {
10794                 err |= ITEM_SIZE_MISMATCH;
10795                 error("extent item at bytenr %llu slot %d has wrong size",
10796                         eb->start, slot);
10797                 goto out;
10798         }
10799
10800         /* Now check every backref in this extent item */
10801         iref = (struct btrfs_extent_inline_ref *)ptr;
10802         type = btrfs_extent_inline_ref_type(eb, iref);
10803         offset = btrfs_extent_inline_ref_offset(eb, iref);
10804         switch (type) {
10805         case BTRFS_TREE_BLOCK_REF_KEY:
10806                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10807                                                level);
10808                 err |= ret;
10809                 break;
10810         case BTRFS_SHARED_BLOCK_REF_KEY:
10811                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10812                                                  level);
10813                 err |= ret;
10814                 break;
10815         case BTRFS_EXTENT_DATA_REF_KEY:
10816                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10817                 ret = check_extent_data_backref(fs_info,
10818                                 btrfs_extent_data_ref_root(eb, dref),
10819                                 btrfs_extent_data_ref_objectid(eb, dref),
10820                                 btrfs_extent_data_ref_offset(eb, dref),
10821                                 key.objectid, key.offset,
10822                                 btrfs_extent_data_ref_count(eb, dref));
10823                 err |= ret;
10824                 break;
10825         case BTRFS_SHARED_DATA_REF_KEY:
10826                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10827                 err |= ret;
10828                 break;
10829         default:
10830                 error("extent[%llu %d %llu] has unknown ref type: %d",
10831                         key.objectid, key.type, key.offset, type);
10832                 err |= UNKNOWN_TYPE;
10833                 goto out;
10834         }
10835
10836         ptr += btrfs_extent_inline_ref_size(type);
10837         goto next;
10838
10839 out:
10840         return err;
10841 }
10842
10843 /*
10844  * Check if a dev extent item is referred correctly by its chunk
10845  */
10846 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10847                                  struct extent_buffer *eb, int slot)
10848 {
10849         struct btrfs_root *chunk_root = fs_info->chunk_root;
10850         struct btrfs_dev_extent *ptr;
10851         struct btrfs_path path;
10852         struct btrfs_key chunk_key;
10853         struct btrfs_key devext_key;
10854         struct btrfs_chunk *chunk;
10855         struct extent_buffer *l;
10856         int num_stripes;
10857         u64 length;
10858         int i;
10859         int found_chunk = 0;
10860         int ret;
10861
10862         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10863         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10864         length = btrfs_dev_extent_length(eb, ptr);
10865
10866         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10867         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10868         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10869
10870         btrfs_init_path(&path);
10871         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10872         if (ret)
10873                 goto out;
10874
10875         l = path.nodes[0];
10876         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10877         if (btrfs_chunk_length(l, chunk) != length)
10878                 goto out;
10879
10880         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10881         for (i = 0; i < num_stripes; i++) {
10882                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10883                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10884
10885                 if (devid == devext_key.objectid &&
10886                     offset == devext_key.offset) {
10887                         found_chunk = 1;
10888                         break;
10889                 }
10890         }
10891 out:
10892         btrfs_release_path(&path);
10893         if (!found_chunk) {
10894                 error(
10895                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10896                         devext_key.objectid, devext_key.offset, length);
10897                 return REFERENCER_MISSING;
10898         }
10899         return 0;
10900 }
10901
10902 /*
10903  * Check if the used space is correct with the dev item
10904  */
10905 static int check_dev_item(struct btrfs_fs_info *fs_info,
10906                           struct extent_buffer *eb, int slot)
10907 {
10908         struct btrfs_root *dev_root = fs_info->dev_root;
10909         struct btrfs_dev_item *dev_item;
10910         struct btrfs_path path;
10911         struct btrfs_key key;
10912         struct btrfs_dev_extent *ptr;
10913         u64 dev_id;
10914         u64 used;
10915         u64 total = 0;
10916         int ret;
10917
10918         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10919         dev_id = btrfs_device_id(eb, dev_item);
10920         used = btrfs_device_bytes_used(eb, dev_item);
10921
10922         key.objectid = dev_id;
10923         key.type = BTRFS_DEV_EXTENT_KEY;
10924         key.offset = 0;
10925
10926         btrfs_init_path(&path);
10927         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10928         if (ret < 0) {
10929                 btrfs_item_key_to_cpu(eb, &key, slot);
10930                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10931                         key.objectid, key.type, key.offset);
10932                 btrfs_release_path(&path);
10933                 return REFERENCER_MISSING;
10934         }
10935
10936         /* Iterate dev_extents to calculate the used space of a device */
10937         while (1) {
10938                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10939                         goto next;
10940
10941                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10942                 if (key.objectid > dev_id)
10943                         break;
10944                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10945                         goto next;
10946
10947                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10948                                      struct btrfs_dev_extent);
10949                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10950 next:
10951                 ret = btrfs_next_item(dev_root, &path);
10952                 if (ret)
10953                         break;
10954         }
10955         btrfs_release_path(&path);
10956
10957         if (used != total) {
10958                 btrfs_item_key_to_cpu(eb, &key, slot);
10959                 error(
10960 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10961                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10962                         BTRFS_DEV_EXTENT_KEY, dev_id);
10963                 return ACCOUNTING_MISMATCH;
10964         }
10965         return 0;
10966 }
10967
10968 /*
10969  * Check a block group item with its referener (chunk) and its used space
10970  * with extent/metadata item
10971  */
10972 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10973                                   struct extent_buffer *eb, int slot)
10974 {
10975         struct btrfs_root *extent_root = fs_info->extent_root;
10976         struct btrfs_root *chunk_root = fs_info->chunk_root;
10977         struct btrfs_block_group_item *bi;
10978         struct btrfs_block_group_item bg_item;
10979         struct btrfs_path path;
10980         struct btrfs_key bg_key;
10981         struct btrfs_key chunk_key;
10982         struct btrfs_key extent_key;
10983         struct btrfs_chunk *chunk;
10984         struct extent_buffer *leaf;
10985         struct btrfs_extent_item *ei;
10986         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10987         u64 flags;
10988         u64 bg_flags;
10989         u64 used;
10990         u64 total = 0;
10991         int ret;
10992         int err = 0;
10993
10994         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10995         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10996         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10997         used = btrfs_block_group_used(&bg_item);
10998         bg_flags = btrfs_block_group_flags(&bg_item);
10999
11000         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11001         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11002         chunk_key.offset = bg_key.objectid;
11003
11004         btrfs_init_path(&path);
11005         /* Search for the referencer chunk */
11006         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11007         if (ret) {
11008                 error(
11009                 "block group[%llu %llu] did not find the related chunk item",
11010                         bg_key.objectid, bg_key.offset);
11011                 err |= REFERENCER_MISSING;
11012         } else {
11013                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11014                                         struct btrfs_chunk);
11015                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11016                                                 bg_key.offset) {
11017                         error(
11018         "block group[%llu %llu] related chunk item length does not match",
11019                                 bg_key.objectid, bg_key.offset);
11020                         err |= REFERENCER_MISMATCH;
11021                 }
11022         }
11023         btrfs_release_path(&path);
11024
11025         /* Search from the block group bytenr */
11026         extent_key.objectid = bg_key.objectid;
11027         extent_key.type = 0;
11028         extent_key.offset = 0;
11029
11030         btrfs_init_path(&path);
11031         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11032         if (ret < 0)
11033                 goto out;
11034
11035         /* Iterate extent tree to account used space */
11036         while (1) {
11037                 leaf = path.nodes[0];
11038
11039                 /* Search slot can point to the last item beyond leaf nritems */
11040                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11041                         goto next;
11042
11043                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11044                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11045                         break;
11046
11047                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11048                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11049                         goto next;
11050                 if (extent_key.objectid < bg_key.objectid)
11051                         goto next;
11052
11053                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11054                         total += nodesize;
11055                 else
11056                         total += extent_key.offset;
11057
11058                 ei = btrfs_item_ptr(leaf, path.slots[0],
11059                                     struct btrfs_extent_item);
11060                 flags = btrfs_extent_flags(leaf, ei);
11061                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11062                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11063                                 error(
11064                         "bad extent[%llu, %llu) type mismatch with chunk",
11065                                         extent_key.objectid,
11066                                         extent_key.objectid + extent_key.offset);
11067                                 err |= CHUNK_TYPE_MISMATCH;
11068                         }
11069                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11070                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11071                                     BTRFS_BLOCK_GROUP_METADATA))) {
11072                                 error(
11073                         "bad extent[%llu, %llu) type mismatch with chunk",
11074                                         extent_key.objectid,
11075                                         extent_key.objectid + nodesize);
11076                                 err |= CHUNK_TYPE_MISMATCH;
11077                         }
11078                 }
11079 next:
11080                 ret = btrfs_next_item(extent_root, &path);
11081                 if (ret)
11082                         break;
11083         }
11084
11085 out:
11086         btrfs_release_path(&path);
11087
11088         if (total != used) {
11089                 error(
11090                 "block group[%llu %llu] used %llu but extent items used %llu",
11091                         bg_key.objectid, bg_key.offset, used, total);
11092                 err |= ACCOUNTING_MISMATCH;
11093         }
11094         return err;
11095 }
11096
11097 /*
11098  * Check a chunk item.
11099  * Including checking all referred dev_extents and block group
11100  */
11101 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11102                             struct extent_buffer *eb, int slot)
11103 {
11104         struct btrfs_root *extent_root = fs_info->extent_root;
11105         struct btrfs_root *dev_root = fs_info->dev_root;
11106         struct btrfs_path path;
11107         struct btrfs_key chunk_key;
11108         struct btrfs_key bg_key;
11109         struct btrfs_key devext_key;
11110         struct btrfs_chunk *chunk;
11111         struct extent_buffer *leaf;
11112         struct btrfs_block_group_item *bi;
11113         struct btrfs_block_group_item bg_item;
11114         struct btrfs_dev_extent *ptr;
11115         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11116         u64 length;
11117         u64 chunk_end;
11118         u64 type;
11119         u64 profile;
11120         int num_stripes;
11121         u64 offset;
11122         u64 objectid;
11123         int i;
11124         int ret;
11125         int err = 0;
11126
11127         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11128         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11129         length = btrfs_chunk_length(eb, chunk);
11130         chunk_end = chunk_key.offset + length;
11131         if (!IS_ALIGNED(length, sectorsize)) {
11132                 error("chunk[%llu %llu) not aligned to %u",
11133                         chunk_key.offset, chunk_end, sectorsize);
11134                 err |= BYTES_UNALIGNED;
11135                 goto out;
11136         }
11137
11138         type = btrfs_chunk_type(eb, chunk);
11139         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11140         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11141                 error("chunk[%llu %llu) has no chunk type",
11142                         chunk_key.offset, chunk_end);
11143                 err |= UNKNOWN_TYPE;
11144         }
11145         if (profile && (profile & (profile - 1))) {
11146                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11147                         chunk_key.offset, chunk_end, profile);
11148                 err |= UNKNOWN_TYPE;
11149         }
11150
11151         bg_key.objectid = chunk_key.offset;
11152         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11153         bg_key.offset = length;
11154
11155         btrfs_init_path(&path);
11156         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11157         if (ret) {
11158                 error(
11159                 "chunk[%llu %llu) did not find the related block group item",
11160                         chunk_key.offset, chunk_end);
11161                 err |= REFERENCER_MISSING;
11162         } else{
11163                 leaf = path.nodes[0];
11164                 bi = btrfs_item_ptr(leaf, path.slots[0],
11165                                     struct btrfs_block_group_item);
11166                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11167                                    sizeof(bg_item));
11168                 if (btrfs_block_group_flags(&bg_item) != type) {
11169                         error(
11170 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11171                                 chunk_key.offset, chunk_end, type,
11172                                 btrfs_block_group_flags(&bg_item));
11173                         err |= REFERENCER_MISSING;
11174                 }
11175         }
11176
11177         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11178         for (i = 0; i < num_stripes; i++) {
11179                 btrfs_release_path(&path);
11180                 btrfs_init_path(&path);
11181                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11182                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11183                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11184
11185                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11186                                         0, 0);
11187                 if (ret)
11188                         goto not_match_dev;
11189
11190                 leaf = path.nodes[0];
11191                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11192                                      struct btrfs_dev_extent);
11193                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11194                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11195                 if (objectid != chunk_key.objectid ||
11196                     offset != chunk_key.offset ||
11197                     btrfs_dev_extent_length(leaf, ptr) != length)
11198                         goto not_match_dev;
11199                 continue;
11200 not_match_dev:
11201                 err |= BACKREF_MISSING;
11202                 error(
11203                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11204                         chunk_key.objectid, chunk_end, i);
11205                 continue;
11206         }
11207         btrfs_release_path(&path);
11208 out:
11209         return err;
11210 }
11211
11212 /*
11213  * Main entry function to check known items and update related accounting info
11214  */
11215 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11216 {
11217         struct btrfs_fs_info *fs_info = root->fs_info;
11218         struct btrfs_key key;
11219         int slot = 0;
11220         int type;
11221         struct btrfs_extent_data_ref *dref;
11222         int ret;
11223         int err = 0;
11224
11225 next:
11226         btrfs_item_key_to_cpu(eb, &key, slot);
11227         type = key.type;
11228
11229         switch (type) {
11230         case BTRFS_EXTENT_DATA_KEY:
11231                 ret = check_extent_data_item(root, eb, slot);
11232                 err |= ret;
11233                 break;
11234         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11235                 ret = check_block_group_item(fs_info, eb, slot);
11236                 err |= ret;
11237                 break;
11238         case BTRFS_DEV_ITEM_KEY:
11239                 ret = check_dev_item(fs_info, eb, slot);
11240                 err |= ret;
11241                 break;
11242         case BTRFS_CHUNK_ITEM_KEY:
11243                 ret = check_chunk_item(fs_info, eb, slot);
11244                 err |= ret;
11245                 break;
11246         case BTRFS_DEV_EXTENT_KEY:
11247                 ret = check_dev_extent_item(fs_info, eb, slot);
11248                 err |= ret;
11249                 break;
11250         case BTRFS_EXTENT_ITEM_KEY:
11251         case BTRFS_METADATA_ITEM_KEY:
11252                 ret = check_extent_item(fs_info, eb, slot);
11253                 err |= ret;
11254                 break;
11255         case BTRFS_EXTENT_CSUM_KEY:
11256                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11257                 break;
11258         case BTRFS_TREE_BLOCK_REF_KEY:
11259                 ret = check_tree_block_backref(fs_info, key.offset,
11260                                                key.objectid, -1);
11261                 err |= ret;
11262                 break;
11263         case BTRFS_EXTENT_DATA_REF_KEY:
11264                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11265                 ret = check_extent_data_backref(fs_info,
11266                                 btrfs_extent_data_ref_root(eb, dref),
11267                                 btrfs_extent_data_ref_objectid(eb, dref),
11268                                 btrfs_extent_data_ref_offset(eb, dref),
11269                                 key.objectid, 0,
11270                                 btrfs_extent_data_ref_count(eb, dref));
11271                 err |= ret;
11272                 break;
11273         case BTRFS_SHARED_BLOCK_REF_KEY:
11274                 ret = check_shared_block_backref(fs_info, key.offset,
11275                                                  key.objectid, -1);
11276                 err |= ret;
11277                 break;
11278         case BTRFS_SHARED_DATA_REF_KEY:
11279                 ret = check_shared_data_backref(fs_info, key.offset,
11280                                                 key.objectid);
11281                 err |= ret;
11282                 break;
11283         default:
11284                 break;
11285         }
11286
11287         if (++slot < btrfs_header_nritems(eb))
11288                 goto next;
11289
11290         return err;
11291 }
11292
11293 /*
11294  * Helper function for later fs/subvol tree check.  To determine if a tree
11295  * block should be checked.
11296  * This function will ensure only the direct referencer with lowest rootid to
11297  * check a fs/subvolume tree block.
11298  *
11299  * Backref check at extent tree would detect errors like missing subvolume
11300  * tree, so we can do aggressive check to reduce duplicated checks.
11301  */
11302 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11303 {
11304         struct btrfs_root *extent_root = root->fs_info->extent_root;
11305         struct btrfs_key key;
11306         struct btrfs_path path;
11307         struct extent_buffer *leaf;
11308         int slot;
11309         struct btrfs_extent_item *ei;
11310         unsigned long ptr;
11311         unsigned long end;
11312         int type;
11313         u32 item_size;
11314         u64 offset;
11315         struct btrfs_extent_inline_ref *iref;
11316         int ret;
11317
11318         btrfs_init_path(&path);
11319         key.objectid = btrfs_header_bytenr(eb);
11320         key.type = BTRFS_METADATA_ITEM_KEY;
11321         key.offset = (u64)-1;
11322
11323         /*
11324          * Any failure in backref resolving means we can't determine
11325          * whom the tree block belongs to.
11326          * So in that case, we need to check that tree block
11327          */
11328         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11329         if (ret < 0)
11330                 goto need_check;
11331
11332         ret = btrfs_previous_extent_item(extent_root, &path,
11333                                          btrfs_header_bytenr(eb));
11334         if (ret)
11335                 goto need_check;
11336
11337         leaf = path.nodes[0];
11338         slot = path.slots[0];
11339         btrfs_item_key_to_cpu(leaf, &key, slot);
11340         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11341
11342         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11343                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11344         } else {
11345                 struct btrfs_tree_block_info *info;
11346
11347                 info = (struct btrfs_tree_block_info *)(ei + 1);
11348                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11349         }
11350
11351         item_size = btrfs_item_size_nr(leaf, slot);
11352         ptr = (unsigned long)iref;
11353         end = (unsigned long)ei + item_size;
11354         while (ptr < end) {
11355                 iref = (struct btrfs_extent_inline_ref *)ptr;
11356                 type = btrfs_extent_inline_ref_type(leaf, iref);
11357                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11358
11359                 /*
11360                  * We only check the tree block if current root is
11361                  * the lowest referencer of it.
11362                  */
11363                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11364                     offset < root->objectid) {
11365                         btrfs_release_path(&path);
11366                         return 0;
11367                 }
11368
11369                 ptr += btrfs_extent_inline_ref_size(type);
11370         }
11371         /*
11372          * Normally we should also check keyed tree block ref, but that may be
11373          * very time consuming.  Inlined ref should already make us skip a lot
11374          * of refs now.  So skip search keyed tree block ref.
11375          */
11376
11377 need_check:
11378         btrfs_release_path(&path);
11379         return 1;
11380 }
11381
11382 /*
11383  * Traversal function for tree block. We will do:
11384  * 1) Skip shared fs/subvolume tree blocks
11385  * 2) Update related bytes accounting
11386  * 3) Pre-order traversal
11387  */
11388 static int traverse_tree_block(struct btrfs_root *root,
11389                                 struct extent_buffer *node)
11390 {
11391         struct extent_buffer *eb;
11392         struct btrfs_key key;
11393         struct btrfs_key drop_key;
11394         int level;
11395         u64 nr;
11396         int i;
11397         int err = 0;
11398         int ret;
11399
11400         /*
11401          * Skip shared fs/subvolume tree block, in that case they will
11402          * be checked by referencer with lowest rootid
11403          */
11404         if (is_fstree(root->objectid) && !should_check(root, node))
11405                 return 0;
11406
11407         /* Update bytes accounting */
11408         total_btree_bytes += node->len;
11409         if (fs_root_objectid(btrfs_header_owner(node)))
11410                 total_fs_tree_bytes += node->len;
11411         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11412                 total_extent_tree_bytes += node->len;
11413         if (!found_old_backref &&
11414             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11415             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11416             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11417                 found_old_backref = 1;
11418
11419         /* pre-order tranversal, check itself first */
11420         level = btrfs_header_level(node);
11421         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11422                                    btrfs_header_level(node),
11423                                    btrfs_header_owner(node));
11424         err |= ret;
11425         if (err)
11426                 error(
11427         "check %s failed root %llu bytenr %llu level %d, force continue check",
11428                         level ? "node":"leaf", root->objectid,
11429                         btrfs_header_bytenr(node), btrfs_header_level(node));
11430
11431         if (!level) {
11432                 btree_space_waste += btrfs_leaf_free_space(root, node);
11433                 ret = check_leaf_items(root, node);
11434                 err |= ret;
11435                 return err;
11436         }
11437
11438         nr = btrfs_header_nritems(node);
11439         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11440         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11441                 sizeof(struct btrfs_key_ptr);
11442
11443         /* Then check all its children */
11444         for (i = 0; i < nr; i++) {
11445                 u64 blocknr = btrfs_node_blockptr(node, i);
11446
11447                 btrfs_node_key_to_cpu(node, &key, i);
11448                 if (level == root->root_item.drop_level &&
11449                     is_dropped_key(&key, &drop_key))
11450                         continue;
11451
11452                 /*
11453                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11454                  * to call the function itself.
11455                  */
11456                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11457                 if (extent_buffer_uptodate(eb)) {
11458                         ret = traverse_tree_block(root, eb);
11459                         err |= ret;
11460                 }
11461                 free_extent_buffer(eb);
11462         }
11463
11464         return err;
11465 }
11466
11467 /*
11468  * Low memory usage version check_chunks_and_extents.
11469  */
11470 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11471 {
11472         struct btrfs_path path;
11473         struct btrfs_key key;
11474         struct btrfs_root *root1;
11475         struct btrfs_root *cur_root;
11476         int err = 0;
11477         int ret;
11478
11479         root1 = root->fs_info->chunk_root;
11480         ret = traverse_tree_block(root1, root1->node);
11481         err |= ret;
11482
11483         root1 = root->fs_info->tree_root;
11484         ret = traverse_tree_block(root1, root1->node);
11485         err |= ret;
11486
11487         btrfs_init_path(&path);
11488         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11489         key.offset = 0;
11490         key.type = BTRFS_ROOT_ITEM_KEY;
11491
11492         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11493         if (ret) {
11494                 error("cannot find extent treet in tree_root");
11495                 goto out;
11496         }
11497
11498         while (1) {
11499                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11500                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11501                         goto next;
11502                 key.offset = (u64)-1;
11503
11504                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11505                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11506                                         &key);
11507                 else
11508                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11509                 if (IS_ERR(cur_root) || !cur_root) {
11510                         error("failed to read tree: %lld", key.objectid);
11511                         goto next;
11512                 }
11513
11514                 ret = traverse_tree_block(cur_root, cur_root->node);
11515                 err |= ret;
11516
11517                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11518                         btrfs_free_fs_root(cur_root);
11519 next:
11520                 ret = btrfs_next_item(root1, &path);
11521                 if (ret)
11522                         goto out;
11523         }
11524
11525 out:
11526         btrfs_release_path(&path);
11527         return err;
11528 }
11529
11530 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11531                            struct btrfs_root *root, int overwrite)
11532 {
11533         struct extent_buffer *c;
11534         struct extent_buffer *old = root->node;
11535         int level;
11536         int ret;
11537         struct btrfs_disk_key disk_key = {0,0,0};
11538
11539         level = 0;
11540
11541         if (overwrite) {
11542                 c = old;
11543                 extent_buffer_get(c);
11544                 goto init;
11545         }
11546         c = btrfs_alloc_free_block(trans, root,
11547                                    root->nodesize,
11548                                    root->root_key.objectid,
11549                                    &disk_key, level, 0, 0);
11550         if (IS_ERR(c)) {
11551                 c = old;
11552                 extent_buffer_get(c);
11553                 overwrite = 1;
11554         }
11555 init:
11556         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11557         btrfs_set_header_level(c, level);
11558         btrfs_set_header_bytenr(c, c->start);
11559         btrfs_set_header_generation(c, trans->transid);
11560         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11561         btrfs_set_header_owner(c, root->root_key.objectid);
11562
11563         write_extent_buffer(c, root->fs_info->fsid,
11564                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11565
11566         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11567                             btrfs_header_chunk_tree_uuid(c),
11568                             BTRFS_UUID_SIZE);
11569
11570         btrfs_mark_buffer_dirty(c);
11571         /*
11572          * this case can happen in the following case:
11573          *
11574          * 1.overwrite previous root.
11575          *
11576          * 2.reinit reloc data root, this is because we skip pin
11577          * down reloc data tree before which means we can allocate
11578          * same block bytenr here.
11579          */
11580         if (old->start == c->start) {
11581                 btrfs_set_root_generation(&root->root_item,
11582                                           trans->transid);
11583                 root->root_item.level = btrfs_header_level(root->node);
11584                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11585                                         &root->root_key, &root->root_item);
11586                 if (ret) {
11587                         free_extent_buffer(c);
11588                         return ret;
11589                 }
11590         }
11591         free_extent_buffer(old);
11592         root->node = c;
11593         add_root_to_dirty_list(root);
11594         return 0;
11595 }
11596
11597 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11598                                 struct extent_buffer *eb, int tree_root)
11599 {
11600         struct extent_buffer *tmp;
11601         struct btrfs_root_item *ri;
11602         struct btrfs_key key;
11603         u64 bytenr;
11604         u32 nodesize;
11605         int level = btrfs_header_level(eb);
11606         int nritems;
11607         int ret;
11608         int i;
11609
11610         /*
11611          * If we have pinned this block before, don't pin it again.
11612          * This can not only avoid forever loop with broken filesystem
11613          * but also give us some speedups.
11614          */
11615         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11616                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11617                 return 0;
11618
11619         btrfs_pin_extent(fs_info, eb->start, eb->len);
11620
11621         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11622         nritems = btrfs_header_nritems(eb);
11623         for (i = 0; i < nritems; i++) {
11624                 if (level == 0) {
11625                         btrfs_item_key_to_cpu(eb, &key, i);
11626                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11627                                 continue;
11628                         /* Skip the extent root and reloc roots */
11629                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11630                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11631                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11632                                 continue;
11633                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11634                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11635
11636                         /*
11637                          * If at any point we start needing the real root we
11638                          * will have to build a stump root for the root we are
11639                          * in, but for now this doesn't actually use the root so
11640                          * just pass in extent_root.
11641                          */
11642                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11643                                               nodesize, 0);
11644                         if (!extent_buffer_uptodate(tmp)) {
11645                                 fprintf(stderr, "Error reading root block\n");
11646                                 return -EIO;
11647                         }
11648                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11649                         free_extent_buffer(tmp);
11650                         if (ret)
11651                                 return ret;
11652                 } else {
11653                         bytenr = btrfs_node_blockptr(eb, i);
11654
11655                         /* If we aren't the tree root don't read the block */
11656                         if (level == 1 && !tree_root) {
11657                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11658                                 continue;
11659                         }
11660
11661                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11662                                               nodesize, 0);
11663                         if (!extent_buffer_uptodate(tmp)) {
11664                                 fprintf(stderr, "Error reading tree block\n");
11665                                 return -EIO;
11666                         }
11667                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11668                         free_extent_buffer(tmp);
11669                         if (ret)
11670                                 return ret;
11671                 }
11672         }
11673
11674         return 0;
11675 }
11676
11677 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11678 {
11679         int ret;
11680
11681         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11682         if (ret)
11683                 return ret;
11684
11685         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11686 }
11687
11688 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11689 {
11690         struct btrfs_block_group_cache *cache;
11691         struct btrfs_path path;
11692         struct extent_buffer *leaf;
11693         struct btrfs_chunk *chunk;
11694         struct btrfs_key key;
11695         int ret;
11696         u64 start;
11697
11698         btrfs_init_path(&path);
11699         key.objectid = 0;
11700         key.type = BTRFS_CHUNK_ITEM_KEY;
11701         key.offset = 0;
11702         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11703         if (ret < 0) {
11704                 btrfs_release_path(&path);
11705                 return ret;
11706         }
11707
11708         /*
11709          * We do this in case the block groups were screwed up and had alloc
11710          * bits that aren't actually set on the chunks.  This happens with
11711          * restored images every time and could happen in real life I guess.
11712          */
11713         fs_info->avail_data_alloc_bits = 0;
11714         fs_info->avail_metadata_alloc_bits = 0;
11715         fs_info->avail_system_alloc_bits = 0;
11716
11717         /* First we need to create the in-memory block groups */
11718         while (1) {
11719                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11720                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11721                         if (ret < 0) {
11722                                 btrfs_release_path(&path);
11723                                 return ret;
11724                         }
11725                         if (ret) {
11726                                 ret = 0;
11727                                 break;
11728                         }
11729                 }
11730                 leaf = path.nodes[0];
11731                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11732                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11733                         path.slots[0]++;
11734                         continue;
11735                 }
11736
11737                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11738                 btrfs_add_block_group(fs_info, 0,
11739                                       btrfs_chunk_type(leaf, chunk),
11740                                       key.objectid, key.offset,
11741                                       btrfs_chunk_length(leaf, chunk));
11742                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11743                                  key.offset + btrfs_chunk_length(leaf, chunk));
11744                 path.slots[0]++;
11745         }
11746         start = 0;
11747         while (1) {
11748                 cache = btrfs_lookup_first_block_group(fs_info, start);
11749                 if (!cache)
11750                         break;
11751                 cache->cached = 1;
11752                 start = cache->key.objectid + cache->key.offset;
11753         }
11754
11755         btrfs_release_path(&path);
11756         return 0;
11757 }
11758
11759 static int reset_balance(struct btrfs_trans_handle *trans,
11760                          struct btrfs_fs_info *fs_info)
11761 {
11762         struct btrfs_root *root = fs_info->tree_root;
11763         struct btrfs_path path;
11764         struct extent_buffer *leaf;
11765         struct btrfs_key key;
11766         int del_slot, del_nr = 0;
11767         int ret;
11768         int found = 0;
11769
11770         btrfs_init_path(&path);
11771         key.objectid = BTRFS_BALANCE_OBJECTID;
11772         key.type = BTRFS_BALANCE_ITEM_KEY;
11773         key.offset = 0;
11774         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11775         if (ret) {
11776                 if (ret > 0)
11777                         ret = 0;
11778                 if (!ret)
11779                         goto reinit_data_reloc;
11780                 else
11781                         goto out;
11782         }
11783
11784         ret = btrfs_del_item(trans, root, &path);
11785         if (ret)
11786                 goto out;
11787         btrfs_release_path(&path);
11788
11789         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11790         key.type = BTRFS_ROOT_ITEM_KEY;
11791         key.offset = 0;
11792         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11793         if (ret < 0)
11794                 goto out;
11795         while (1) {
11796                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11797                         if (!found)
11798                                 break;
11799
11800                         if (del_nr) {
11801                                 ret = btrfs_del_items(trans, root, &path,
11802                                                       del_slot, del_nr);
11803                                 del_nr = 0;
11804                                 if (ret)
11805                                         goto out;
11806                         }
11807                         key.offset++;
11808                         btrfs_release_path(&path);
11809
11810                         found = 0;
11811                         ret = btrfs_search_slot(trans, root, &key, &path,
11812                                                 -1, 1);
11813                         if (ret < 0)
11814                                 goto out;
11815                         continue;
11816                 }
11817                 found = 1;
11818                 leaf = path.nodes[0];
11819                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11820                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11821                         break;
11822                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11823                         path.slots[0]++;
11824                         continue;
11825                 }
11826                 if (!del_nr) {
11827                         del_slot = path.slots[0];
11828                         del_nr = 1;
11829                 } else {
11830                         del_nr++;
11831                 }
11832                 path.slots[0]++;
11833         }
11834
11835         if (del_nr) {
11836                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11837                 if (ret)
11838                         goto out;
11839         }
11840         btrfs_release_path(&path);
11841
11842 reinit_data_reloc:
11843         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11844         key.type = BTRFS_ROOT_ITEM_KEY;
11845         key.offset = (u64)-1;
11846         root = btrfs_read_fs_root(fs_info, &key);
11847         if (IS_ERR(root)) {
11848                 fprintf(stderr, "Error reading data reloc tree\n");
11849                 ret = PTR_ERR(root);
11850                 goto out;
11851         }
11852         record_root_in_trans(trans, root);
11853         ret = btrfs_fsck_reinit_root(trans, root, 0);
11854         if (ret)
11855                 goto out;
11856         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11857 out:
11858         btrfs_release_path(&path);
11859         return ret;
11860 }
11861
11862 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11863                               struct btrfs_fs_info *fs_info)
11864 {
11865         u64 start = 0;
11866         int ret;
11867
11868         /*
11869          * The only reason we don't do this is because right now we're just
11870          * walking the trees we find and pinning down their bytes, we don't look
11871          * at any of the leaves.  In order to do mixed groups we'd have to check
11872          * the leaves of any fs roots and pin down the bytes for any file
11873          * extents we find.  Not hard but why do it if we don't have to?
11874          */
11875         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11876                 fprintf(stderr, "We don't support re-initing the extent tree "
11877                         "for mixed block groups yet, please notify a btrfs "
11878                         "developer you want to do this so they can add this "
11879                         "functionality.\n");
11880                 return -EINVAL;
11881         }
11882
11883         /*
11884          * first we need to walk all of the trees except the extent tree and pin
11885          * down the bytes that are in use so we don't overwrite any existing
11886          * metadata.
11887          */
11888         ret = pin_metadata_blocks(fs_info);
11889         if (ret) {
11890                 fprintf(stderr, "error pinning down used bytes\n");
11891                 return ret;
11892         }
11893
11894         /*
11895          * Need to drop all the block groups since we're going to recreate all
11896          * of them again.
11897          */
11898         btrfs_free_block_groups(fs_info);
11899         ret = reset_block_groups(fs_info);
11900         if (ret) {
11901                 fprintf(stderr, "error resetting the block groups\n");
11902                 return ret;
11903         }
11904
11905         /* Ok we can allocate now, reinit the extent root */
11906         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11907         if (ret) {
11908                 fprintf(stderr, "extent root initialization failed\n");
11909                 /*
11910                  * When the transaction code is updated we should end the
11911                  * transaction, but for now progs only knows about commit so
11912                  * just return an error.
11913                  */
11914                 return ret;
11915         }
11916
11917         /*
11918          * Now we have all the in-memory block groups setup so we can make
11919          * allocations properly, and the metadata we care about is safe since we
11920          * pinned all of it above.
11921          */
11922         while (1) {
11923                 struct btrfs_block_group_cache *cache;
11924
11925                 cache = btrfs_lookup_first_block_group(fs_info, start);
11926                 if (!cache)
11927                         break;
11928                 start = cache->key.objectid + cache->key.offset;
11929                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11930                                         &cache->key, &cache->item,
11931                                         sizeof(cache->item));
11932                 if (ret) {
11933                         fprintf(stderr, "Error adding block group\n");
11934                         return ret;
11935                 }
11936                 btrfs_extent_post_op(trans, fs_info->extent_root);
11937         }
11938
11939         ret = reset_balance(trans, fs_info);
11940         if (ret)
11941                 fprintf(stderr, "error resetting the pending balance\n");
11942
11943         return ret;
11944 }
11945
11946 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11947 {
11948         struct btrfs_path path;
11949         struct btrfs_trans_handle *trans;
11950         struct btrfs_key key;
11951         int ret;
11952
11953         printf("Recowing metadata block %llu\n", eb->start);
11954         key.objectid = btrfs_header_owner(eb);
11955         key.type = BTRFS_ROOT_ITEM_KEY;
11956         key.offset = (u64)-1;
11957
11958         root = btrfs_read_fs_root(root->fs_info, &key);
11959         if (IS_ERR(root)) {
11960                 fprintf(stderr, "Couldn't find owner root %llu\n",
11961                         key.objectid);
11962                 return PTR_ERR(root);
11963         }
11964
11965         trans = btrfs_start_transaction(root, 1);
11966         if (IS_ERR(trans))
11967                 return PTR_ERR(trans);
11968
11969         btrfs_init_path(&path);
11970         path.lowest_level = btrfs_header_level(eb);
11971         if (path.lowest_level)
11972                 btrfs_node_key_to_cpu(eb, &key, 0);
11973         else
11974                 btrfs_item_key_to_cpu(eb, &key, 0);
11975
11976         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11977         btrfs_commit_transaction(trans, root);
11978         btrfs_release_path(&path);
11979         return ret;
11980 }
11981
11982 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11983 {
11984         struct btrfs_path path;
11985         struct btrfs_trans_handle *trans;
11986         struct btrfs_key key;
11987         int ret;
11988
11989         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11990                bad->key.type, bad->key.offset);
11991         key.objectid = bad->root_id;
11992         key.type = BTRFS_ROOT_ITEM_KEY;
11993         key.offset = (u64)-1;
11994
11995         root = btrfs_read_fs_root(root->fs_info, &key);
11996         if (IS_ERR(root)) {
11997                 fprintf(stderr, "Couldn't find owner root %llu\n",
11998                         key.objectid);
11999                 return PTR_ERR(root);
12000         }
12001
12002         trans = btrfs_start_transaction(root, 1);
12003         if (IS_ERR(trans))
12004                 return PTR_ERR(trans);
12005
12006         btrfs_init_path(&path);
12007         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12008         if (ret) {
12009                 if (ret > 0)
12010                         ret = 0;
12011                 goto out;
12012         }
12013         ret = btrfs_del_item(trans, root, &path);
12014 out:
12015         btrfs_commit_transaction(trans, root);
12016         btrfs_release_path(&path);
12017         return ret;
12018 }
12019
12020 static int zero_log_tree(struct btrfs_root *root)
12021 {
12022         struct btrfs_trans_handle *trans;
12023         int ret;
12024
12025         trans = btrfs_start_transaction(root, 1);
12026         if (IS_ERR(trans)) {
12027                 ret = PTR_ERR(trans);
12028                 return ret;
12029         }
12030         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12031         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12032         ret = btrfs_commit_transaction(trans, root);
12033         return ret;
12034 }
12035
12036 static int populate_csum(struct btrfs_trans_handle *trans,
12037                          struct btrfs_root *csum_root, char *buf, u64 start,
12038                          u64 len)
12039 {
12040         u64 offset = 0;
12041         u64 sectorsize;
12042         int ret = 0;
12043
12044         while (offset < len) {
12045                 sectorsize = csum_root->sectorsize;
12046                 ret = read_extent_data(csum_root, buf, start + offset,
12047                                        &sectorsize, 0);
12048                 if (ret)
12049                         break;
12050                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12051                                             start + offset, buf, sectorsize);
12052                 if (ret)
12053                         break;
12054                 offset += sectorsize;
12055         }
12056         return ret;
12057 }
12058
12059 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12060                                       struct btrfs_root *csum_root,
12061                                       struct btrfs_root *cur_root)
12062 {
12063         struct btrfs_path path;
12064         struct btrfs_key key;
12065         struct extent_buffer *node;
12066         struct btrfs_file_extent_item *fi;
12067         char *buf = NULL;
12068         u64 start = 0;
12069         u64 len = 0;
12070         int slot = 0;
12071         int ret = 0;
12072
12073         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12074         if (!buf)
12075                 return -ENOMEM;
12076
12077         btrfs_init_path(&path);
12078         key.objectid = 0;
12079         key.offset = 0;
12080         key.type = 0;
12081         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12082         if (ret < 0)
12083                 goto out;
12084         /* Iterate all regular file extents and fill its csum */
12085         while (1) {
12086                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12087
12088                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12089                         goto next;
12090                 node = path.nodes[0];
12091                 slot = path.slots[0];
12092                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12093                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12094                         goto next;
12095                 start = btrfs_file_extent_disk_bytenr(node, fi);
12096                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12097
12098                 ret = populate_csum(trans, csum_root, buf, start, len);
12099                 if (ret == -EEXIST)
12100                         ret = 0;
12101                 if (ret < 0)
12102                         goto out;
12103 next:
12104                 /*
12105                  * TODO: if next leaf is corrupted, jump to nearest next valid
12106                  * leaf.
12107                  */
12108                 ret = btrfs_next_item(cur_root, &path);
12109                 if (ret < 0)
12110                         goto out;
12111                 if (ret > 0) {
12112                         ret = 0;
12113                         goto out;
12114                 }
12115         }
12116
12117 out:
12118         btrfs_release_path(&path);
12119         free(buf);
12120         return ret;
12121 }
12122
12123 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12124                                   struct btrfs_root *csum_root)
12125 {
12126         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12127         struct btrfs_path path;
12128         struct btrfs_root *tree_root = fs_info->tree_root;
12129         struct btrfs_root *cur_root;
12130         struct extent_buffer *node;
12131         struct btrfs_key key;
12132         int slot = 0;
12133         int ret = 0;
12134
12135         btrfs_init_path(&path);
12136         key.objectid = BTRFS_FS_TREE_OBJECTID;
12137         key.offset = 0;
12138         key.type = BTRFS_ROOT_ITEM_KEY;
12139         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12140         if (ret < 0)
12141                 goto out;
12142         if (ret > 0) {
12143                 ret = -ENOENT;
12144                 goto out;
12145         }
12146
12147         while (1) {
12148                 node = path.nodes[0];
12149                 slot = path.slots[0];
12150                 btrfs_item_key_to_cpu(node, &key, slot);
12151                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12152                         goto out;
12153                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12154                         goto next;
12155                 if (!is_fstree(key.objectid))
12156                         goto next;
12157                 key.offset = (u64)-1;
12158
12159                 cur_root = btrfs_read_fs_root(fs_info, &key);
12160                 if (IS_ERR(cur_root) || !cur_root) {
12161                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12162                                 key.objectid);
12163                         goto out;
12164                 }
12165                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12166                                 cur_root);
12167                 if (ret < 0)
12168                         goto out;
12169 next:
12170                 ret = btrfs_next_item(tree_root, &path);
12171                 if (ret > 0) {
12172                         ret = 0;
12173                         goto out;
12174                 }
12175                 if (ret < 0)
12176                         goto out;
12177         }
12178
12179 out:
12180         btrfs_release_path(&path);
12181         return ret;
12182 }
12183
12184 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12185                                       struct btrfs_root *csum_root)
12186 {
12187         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12188         struct btrfs_path path;
12189         struct btrfs_extent_item *ei;
12190         struct extent_buffer *leaf;
12191         char *buf;
12192         struct btrfs_key key;
12193         int ret;
12194
12195         btrfs_init_path(&path);
12196         key.objectid = 0;
12197         key.type = BTRFS_EXTENT_ITEM_KEY;
12198         key.offset = 0;
12199         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12200         if (ret < 0) {
12201                 btrfs_release_path(&path);
12202                 return ret;
12203         }
12204
12205         buf = malloc(csum_root->sectorsize);
12206         if (!buf) {
12207                 btrfs_release_path(&path);
12208                 return -ENOMEM;
12209         }
12210
12211         while (1) {
12212                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12213                         ret = btrfs_next_leaf(extent_root, &path);
12214                         if (ret < 0)
12215                                 break;
12216                         if (ret) {
12217                                 ret = 0;
12218                                 break;
12219                         }
12220                 }
12221                 leaf = path.nodes[0];
12222
12223                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12224                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12225                         path.slots[0]++;
12226                         continue;
12227                 }
12228
12229                 ei = btrfs_item_ptr(leaf, path.slots[0],
12230                                     struct btrfs_extent_item);
12231                 if (!(btrfs_extent_flags(leaf, ei) &
12232                       BTRFS_EXTENT_FLAG_DATA)) {
12233                         path.slots[0]++;
12234                         continue;
12235                 }
12236
12237                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12238                                     key.offset);
12239                 if (ret)
12240                         break;
12241                 path.slots[0]++;
12242         }
12243
12244         btrfs_release_path(&path);
12245         free(buf);
12246         return ret;
12247 }
12248
12249 /*
12250  * Recalculate the csum and put it into the csum tree.
12251  *
12252  * Extent tree init will wipe out all the extent info, so in that case, we
12253  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12254  * will use fs/subvol trees to init the csum tree.
12255  */
12256 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12257                           struct btrfs_root *csum_root,
12258                           int search_fs_tree)
12259 {
12260         if (search_fs_tree)
12261                 return fill_csum_tree_from_fs(trans, csum_root);
12262         else
12263                 return fill_csum_tree_from_extent(trans, csum_root);
12264 }
12265
12266 static void free_roots_info_cache(void)
12267 {
12268         if (!roots_info_cache)
12269                 return;
12270
12271         while (!cache_tree_empty(roots_info_cache)) {
12272                 struct cache_extent *entry;
12273                 struct root_item_info *rii;
12274
12275                 entry = first_cache_extent(roots_info_cache);
12276                 if (!entry)
12277                         break;
12278                 remove_cache_extent(roots_info_cache, entry);
12279                 rii = container_of(entry, struct root_item_info, cache_extent);
12280                 free(rii);
12281         }
12282
12283         free(roots_info_cache);
12284         roots_info_cache = NULL;
12285 }
12286
12287 static int build_roots_info_cache(struct btrfs_fs_info *info)
12288 {
12289         int ret = 0;
12290         struct btrfs_key key;
12291         struct extent_buffer *leaf;
12292         struct btrfs_path path;
12293
12294         if (!roots_info_cache) {
12295                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12296                 if (!roots_info_cache)
12297                         return -ENOMEM;
12298                 cache_tree_init(roots_info_cache);
12299         }
12300
12301         btrfs_init_path(&path);
12302         key.objectid = 0;
12303         key.type = BTRFS_EXTENT_ITEM_KEY;
12304         key.offset = 0;
12305         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12306         if (ret < 0)
12307                 goto out;
12308         leaf = path.nodes[0];
12309
12310         while (1) {
12311                 struct btrfs_key found_key;
12312                 struct btrfs_extent_item *ei;
12313                 struct btrfs_extent_inline_ref *iref;
12314                 int slot = path.slots[0];
12315                 int type;
12316                 u64 flags;
12317                 u64 root_id;
12318                 u8 level;
12319                 struct cache_extent *entry;
12320                 struct root_item_info *rii;
12321
12322                 if (slot >= btrfs_header_nritems(leaf)) {
12323                         ret = btrfs_next_leaf(info->extent_root, &path);
12324                         if (ret < 0) {
12325                                 break;
12326                         } else if (ret) {
12327                                 ret = 0;
12328                                 break;
12329                         }
12330                         leaf = path.nodes[0];
12331                         slot = path.slots[0];
12332                 }
12333
12334                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12335
12336                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12337                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12338                         goto next;
12339
12340                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12341                 flags = btrfs_extent_flags(leaf, ei);
12342
12343                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12344                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12345                         goto next;
12346
12347                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12348                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12349                         level = found_key.offset;
12350                 } else {
12351                         struct btrfs_tree_block_info *binfo;
12352
12353                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12354                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12355                         level = btrfs_tree_block_level(leaf, binfo);
12356                 }
12357
12358                 /*
12359                  * For a root extent, it must be of the following type and the
12360                  * first (and only one) iref in the item.
12361                  */
12362                 type = btrfs_extent_inline_ref_type(leaf, iref);
12363                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12364                         goto next;
12365
12366                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12367                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12368                 if (!entry) {
12369                         rii = malloc(sizeof(struct root_item_info));
12370                         if (!rii) {
12371                                 ret = -ENOMEM;
12372                                 goto out;
12373                         }
12374                         rii->cache_extent.start = root_id;
12375                         rii->cache_extent.size = 1;
12376                         rii->level = (u8)-1;
12377                         entry = &rii->cache_extent;
12378                         ret = insert_cache_extent(roots_info_cache, entry);
12379                         ASSERT(ret == 0);
12380                 } else {
12381                         rii = container_of(entry, struct root_item_info,
12382                                            cache_extent);
12383                 }
12384
12385                 ASSERT(rii->cache_extent.start == root_id);
12386                 ASSERT(rii->cache_extent.size == 1);
12387
12388                 if (level > rii->level || rii->level == (u8)-1) {
12389                         rii->level = level;
12390                         rii->bytenr = found_key.objectid;
12391                         rii->gen = btrfs_extent_generation(leaf, ei);
12392                         rii->node_count = 1;
12393                 } else if (level == rii->level) {
12394                         rii->node_count++;
12395                 }
12396 next:
12397                 path.slots[0]++;
12398         }
12399
12400 out:
12401         btrfs_release_path(&path);
12402
12403         return ret;
12404 }
12405
12406 static int maybe_repair_root_item(struct btrfs_path *path,
12407                                   const struct btrfs_key *root_key,
12408                                   const int read_only_mode)
12409 {
12410         const u64 root_id = root_key->objectid;
12411         struct cache_extent *entry;
12412         struct root_item_info *rii;
12413         struct btrfs_root_item ri;
12414         unsigned long offset;
12415
12416         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12417         if (!entry) {
12418                 fprintf(stderr,
12419                         "Error: could not find extent items for root %llu\n",
12420                         root_key->objectid);
12421                 return -ENOENT;
12422         }
12423
12424         rii = container_of(entry, struct root_item_info, cache_extent);
12425         ASSERT(rii->cache_extent.start == root_id);
12426         ASSERT(rii->cache_extent.size == 1);
12427
12428         if (rii->node_count != 1) {
12429                 fprintf(stderr,
12430                         "Error: could not find btree root extent for root %llu\n",
12431                         root_id);
12432                 return -ENOENT;
12433         }
12434
12435         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12436         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12437
12438         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12439             btrfs_root_level(&ri) != rii->level ||
12440             btrfs_root_generation(&ri) != rii->gen) {
12441
12442                 /*
12443                  * If we're in repair mode but our caller told us to not update
12444                  * the root item, i.e. just check if it needs to be updated, don't
12445                  * print this message, since the caller will call us again shortly
12446                  * for the same root item without read only mode (the caller will
12447                  * open a transaction first).
12448                  */
12449                 if (!(read_only_mode && repair))
12450                         fprintf(stderr,
12451                                 "%sroot item for root %llu,"
12452                                 " current bytenr %llu, current gen %llu, current level %u,"
12453                                 " new bytenr %llu, new gen %llu, new level %u\n",
12454                                 (read_only_mode ? "" : "fixing "),
12455                                 root_id,
12456                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12457                                 btrfs_root_level(&ri),
12458                                 rii->bytenr, rii->gen, rii->level);
12459
12460                 if (btrfs_root_generation(&ri) > rii->gen) {
12461                         fprintf(stderr,
12462                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12463                                 root_id, btrfs_root_generation(&ri), rii->gen);
12464                         return -EINVAL;
12465                 }
12466
12467                 if (!read_only_mode) {
12468                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12469                         btrfs_set_root_level(&ri, rii->level);
12470                         btrfs_set_root_generation(&ri, rii->gen);
12471                         write_extent_buffer(path->nodes[0], &ri,
12472                                             offset, sizeof(ri));
12473                 }
12474
12475                 return 1;
12476         }
12477
12478         return 0;
12479 }
12480
12481 /*
12482  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12483  * caused read-only snapshots to be corrupted if they were created at a moment
12484  * when the source subvolume/snapshot had orphan items. The issue was that the
12485  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12486  * node instead of the post orphan cleanup root node.
12487  * So this function, and its callees, just detects and fixes those cases. Even
12488  * though the regression was for read-only snapshots, this function applies to
12489  * any snapshot/subvolume root.
12490  * This must be run before any other repair code - not doing it so, makes other
12491  * repair code delete or modify backrefs in the extent tree for example, which
12492  * will result in an inconsistent fs after repairing the root items.
12493  */
12494 static int repair_root_items(struct btrfs_fs_info *info)
12495 {
12496         struct btrfs_path path;
12497         struct btrfs_key key;
12498         struct extent_buffer *leaf;
12499         struct btrfs_trans_handle *trans = NULL;
12500         int ret = 0;
12501         int bad_roots = 0;
12502         int need_trans = 0;
12503
12504         btrfs_init_path(&path);
12505
12506         ret = build_roots_info_cache(info);
12507         if (ret)
12508                 goto out;
12509
12510         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12511         key.type = BTRFS_ROOT_ITEM_KEY;
12512         key.offset = 0;
12513
12514 again:
12515         /*
12516          * Avoid opening and committing transactions if a leaf doesn't have
12517          * any root items that need to be fixed, so that we avoid rotating
12518          * backup roots unnecessarily.
12519          */
12520         if (need_trans) {
12521                 trans = btrfs_start_transaction(info->tree_root, 1);
12522                 if (IS_ERR(trans)) {
12523                         ret = PTR_ERR(trans);
12524                         goto out;
12525                 }
12526         }
12527
12528         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12529                                 0, trans ? 1 : 0);
12530         if (ret < 0)
12531                 goto out;
12532         leaf = path.nodes[0];
12533
12534         while (1) {
12535                 struct btrfs_key found_key;
12536
12537                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12538                         int no_more_keys = find_next_key(&path, &key);
12539
12540                         btrfs_release_path(&path);
12541                         if (trans) {
12542                                 ret = btrfs_commit_transaction(trans,
12543                                                                info->tree_root);
12544                                 trans = NULL;
12545                                 if (ret < 0)
12546                                         goto out;
12547                         }
12548                         need_trans = 0;
12549                         if (no_more_keys)
12550                                 break;
12551                         goto again;
12552                 }
12553
12554                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12555
12556                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12557                         goto next;
12558                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12559                         goto next;
12560
12561                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12562                 if (ret < 0)
12563                         goto out;
12564                 if (ret) {
12565                         if (!trans && repair) {
12566                                 need_trans = 1;
12567                                 key = found_key;
12568                                 btrfs_release_path(&path);
12569                                 goto again;
12570                         }
12571                         bad_roots++;
12572                 }
12573 next:
12574                 path.slots[0]++;
12575         }
12576         ret = 0;
12577 out:
12578         free_roots_info_cache();
12579         btrfs_release_path(&path);
12580         if (trans)
12581                 btrfs_commit_transaction(trans, info->tree_root);
12582         if (ret < 0)
12583                 return ret;
12584
12585         return bad_roots;
12586 }
12587
12588 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12589 {
12590         struct btrfs_trans_handle *trans;
12591         struct btrfs_block_group_cache *bg_cache;
12592         u64 current = 0;
12593         int ret = 0;
12594
12595         /* Clear all free space cache inodes and its extent data */
12596         while (1) {
12597                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12598                 if (!bg_cache)
12599                         break;
12600                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12601                 if (ret < 0)
12602                         return ret;
12603                 current = bg_cache->key.objectid + bg_cache->key.offset;
12604         }
12605
12606         /* Don't forget to set cache_generation to -1 */
12607         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12608         if (IS_ERR(trans)) {
12609                 error("failed to update super block cache generation");
12610                 return PTR_ERR(trans);
12611         }
12612         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12613         btrfs_commit_transaction(trans, fs_info->tree_root);
12614
12615         return ret;
12616 }
12617
12618 const char * const cmd_check_usage[] = {
12619         "btrfs check [options] <device>",
12620         "Check structural integrity of a filesystem (unmounted).",
12621         "Check structural integrity of an unmounted filesystem. Verify internal",
12622         "trees' consistency and item connectivity. In the repair mode try to",
12623         "fix the problems found. ",
12624         "WARNING: the repair mode is considered dangerous",
12625         "",
12626         "-s|--super <superblock>     use this superblock copy",
12627         "-b|--backup                 use the first valid backup root copy",
12628         "--repair                    try to repair the filesystem",
12629         "--readonly                  run in read-only mode (default)",
12630         "--init-csum-tree            create a new CRC tree",
12631         "--init-extent-tree          create a new extent tree",
12632         "--mode <MODE>               allows choice of memory/IO trade-offs",
12633         "                            where MODE is one of:",
12634         "                            original - read inodes and extents to memory (requires",
12635         "                                       more memory, does less IO)",
12636         "                            lowmem   - try to use less memory but read blocks again",
12637         "                                       when needed",
12638         "--check-data-csum           verify checksums of data blocks",
12639         "-Q|--qgroup-report          print a report on qgroup consistency",
12640         "-E|--subvol-extents <subvolid>",
12641         "                            print subvolume extents and sharing state",
12642         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12643         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12644         "-p|--progress               indicate progress",
12645         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12646         NULL
12647 };
12648
12649 int cmd_check(int argc, char **argv)
12650 {
12651         struct cache_tree root_cache;
12652         struct btrfs_root *root;
12653         struct btrfs_fs_info *info;
12654         u64 bytenr = 0;
12655         u64 subvolid = 0;
12656         u64 tree_root_bytenr = 0;
12657         u64 chunk_root_bytenr = 0;
12658         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12659         int ret;
12660         int err = 0;
12661         u64 num;
12662         int init_csum_tree = 0;
12663         int readonly = 0;
12664         int clear_space_cache = 0;
12665         int qgroup_report = 0;
12666         int qgroups_repaired = 0;
12667         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12668
12669         while(1) {
12670                 int c;
12671                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12672                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12673                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12674                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12675                 static const struct option long_options[] = {
12676                         { "super", required_argument, NULL, 's' },
12677                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12678                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12679                         { "init-csum-tree", no_argument, NULL,
12680                                 GETOPT_VAL_INIT_CSUM },
12681                         { "init-extent-tree", no_argument, NULL,
12682                                 GETOPT_VAL_INIT_EXTENT },
12683                         { "check-data-csum", no_argument, NULL,
12684                                 GETOPT_VAL_CHECK_CSUM },
12685                         { "backup", no_argument, NULL, 'b' },
12686                         { "subvol-extents", required_argument, NULL, 'E' },
12687                         { "qgroup-report", no_argument, NULL, 'Q' },
12688                         { "tree-root", required_argument, NULL, 'r' },
12689                         { "chunk-root", required_argument, NULL,
12690                                 GETOPT_VAL_CHUNK_TREE },
12691                         { "progress", no_argument, NULL, 'p' },
12692                         { "mode", required_argument, NULL,
12693                                 GETOPT_VAL_MODE },
12694                         { "clear-space-cache", required_argument, NULL,
12695                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12696                         { NULL, 0, NULL, 0}
12697                 };
12698
12699                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12700                 if (c < 0)
12701                         break;
12702                 switch(c) {
12703                         case 'a': /* ignored */ break;
12704                         case 'b':
12705                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12706                                 break;
12707                         case 's':
12708                                 num = arg_strtou64(optarg);
12709                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12710                                         error(
12711                                         "super mirror should be less than %d",
12712                                                 BTRFS_SUPER_MIRROR_MAX);
12713                                         exit(1);
12714                                 }
12715                                 bytenr = btrfs_sb_offset(((int)num));
12716                                 printf("using SB copy %llu, bytenr %llu\n", num,
12717                                        (unsigned long long)bytenr);
12718                                 break;
12719                         case 'Q':
12720                                 qgroup_report = 1;
12721                                 break;
12722                         case 'E':
12723                                 subvolid = arg_strtou64(optarg);
12724                                 break;
12725                         case 'r':
12726                                 tree_root_bytenr = arg_strtou64(optarg);
12727                                 break;
12728                         case GETOPT_VAL_CHUNK_TREE:
12729                                 chunk_root_bytenr = arg_strtou64(optarg);
12730                                 break;
12731                         case 'p':
12732                                 ctx.progress_enabled = true;
12733                                 break;
12734                         case '?':
12735                         case 'h':
12736                                 usage(cmd_check_usage);
12737                         case GETOPT_VAL_REPAIR:
12738                                 printf("enabling repair mode\n");
12739                                 repair = 1;
12740                                 ctree_flags |= OPEN_CTREE_WRITES;
12741                                 break;
12742                         case GETOPT_VAL_READONLY:
12743                                 readonly = 1;
12744                                 break;
12745                         case GETOPT_VAL_INIT_CSUM:
12746                                 printf("Creating a new CRC tree\n");
12747                                 init_csum_tree = 1;
12748                                 repair = 1;
12749                                 ctree_flags |= OPEN_CTREE_WRITES;
12750                                 break;
12751                         case GETOPT_VAL_INIT_EXTENT:
12752                                 init_extent_tree = 1;
12753                                 ctree_flags |= (OPEN_CTREE_WRITES |
12754                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12755                                 repair = 1;
12756                                 break;
12757                         case GETOPT_VAL_CHECK_CSUM:
12758                                 check_data_csum = 1;
12759                                 break;
12760                         case GETOPT_VAL_MODE:
12761                                 check_mode = parse_check_mode(optarg);
12762                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12763                                         error("unknown mode: %s", optarg);
12764                                         exit(1);
12765                                 }
12766                                 break;
12767                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12768                                 if (strcmp(optarg, "v1") == 0) {
12769                                         clear_space_cache = 1;
12770                                 } else if (strcmp(optarg, "v2") == 0) {
12771                                         clear_space_cache = 2;
12772                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12773                                 } else {
12774                                         error(
12775                 "invalid argument to --clear-space-cache, must be v1 or v2");
12776                                         exit(1);
12777                                 }
12778                                 ctree_flags |= OPEN_CTREE_WRITES;
12779                                 break;
12780                 }
12781         }
12782
12783         if (check_argc_exact(argc - optind, 1))
12784                 usage(cmd_check_usage);
12785
12786         if (ctx.progress_enabled) {
12787                 ctx.tp = TASK_NOTHING;
12788                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12789         }
12790
12791         /* This check is the only reason for --readonly to exist */
12792         if (readonly && repair) {
12793                 error("repair options are not compatible with --readonly");
12794                 exit(1);
12795         }
12796
12797         /*
12798          * Not supported yet
12799          */
12800         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12801                 error("low memory mode doesn't support repair yet");
12802                 exit(1);
12803         }
12804
12805         radix_tree_init();
12806         cache_tree_init(&root_cache);
12807
12808         if((ret = check_mounted(argv[optind])) < 0) {
12809                 error("could not check mount status: %s", strerror(-ret));
12810                 err |= !!ret;
12811                 goto err_out;
12812         } else if(ret) {
12813                 error("%s is currently mounted, aborting", argv[optind]);
12814                 ret = -EBUSY;
12815                 err |= !!ret;
12816                 goto err_out;
12817         }
12818
12819         /* only allow partial opening under repair mode */
12820         if (repair)
12821                 ctree_flags |= OPEN_CTREE_PARTIAL;
12822
12823         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12824                                   chunk_root_bytenr, ctree_flags);
12825         if (!info) {
12826                 error("cannot open file system");
12827                 ret = -EIO;
12828                 err |= !!ret;
12829                 goto err_out;
12830         }
12831
12832         global_info = info;
12833         root = info->fs_root;
12834         if (clear_space_cache == 1) {
12835                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12836                         error(
12837                 "free space cache v2 detected, use --clear-space-cache v2");
12838                         ret = 1;
12839                         goto close_out;
12840                 }
12841                 printf("Clearing free space cache\n");
12842                 ret = clear_free_space_cache(info);
12843                 if (ret) {
12844                         error("failed to clear free space cache");
12845                         ret = 1;
12846                 } else {
12847                         printf("Free space cache cleared\n");
12848                 }
12849                 goto close_out;
12850         } else if (clear_space_cache == 2) {
12851                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12852                         printf("no free space cache v2 to clear\n");
12853                         ret = 0;
12854                         goto close_out;
12855                 }
12856                 printf("Clear free space cache v2\n");
12857                 ret = btrfs_clear_free_space_tree(info);
12858                 if (ret) {
12859                         error("failed to clear free space cache v2: %d", ret);
12860                         ret = 1;
12861                 } else {
12862                         printf("free space cache v2 cleared\n");
12863                 }
12864                 goto close_out;
12865         }
12866
12867         /*
12868          * repair mode will force us to commit transaction which
12869          * will make us fail to load log tree when mounting.
12870          */
12871         if (repair && btrfs_super_log_root(info->super_copy)) {
12872                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12873                 if (!ret) {
12874                         ret = 1;
12875                         err |= !!ret;
12876                         goto close_out;
12877                 }
12878                 ret = zero_log_tree(root);
12879                 err |= !!ret;
12880                 if (ret) {
12881                         error("failed to zero log tree: %d", ret);
12882                         goto close_out;
12883                 }
12884         }
12885
12886         uuid_unparse(info->super_copy->fsid, uuidbuf);
12887         if (qgroup_report) {
12888                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12889                        uuidbuf);
12890                 ret = qgroup_verify_all(info);
12891                 err |= !!ret;
12892                 if (ret == 0)
12893                         report_qgroups(1);
12894                 goto close_out;
12895         }
12896         if (subvolid) {
12897                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12898                        subvolid, argv[optind], uuidbuf);
12899                 ret = print_extent_state(info, subvolid);
12900                 err |= !!ret;
12901                 goto close_out;
12902         }
12903         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12904
12905         if (!extent_buffer_uptodate(info->tree_root->node) ||
12906             !extent_buffer_uptodate(info->dev_root->node) ||
12907             !extent_buffer_uptodate(info->chunk_root->node)) {
12908                 error("critical roots corrupted, unable to check the filesystem");
12909                 err |= !!ret;
12910                 ret = -EIO;
12911                 goto close_out;
12912         }
12913
12914         if (init_extent_tree || init_csum_tree) {
12915                 struct btrfs_trans_handle *trans;
12916
12917                 trans = btrfs_start_transaction(info->extent_root, 0);
12918                 if (IS_ERR(trans)) {
12919                         error("error starting transaction");
12920                         ret = PTR_ERR(trans);
12921                         err |= !!ret;
12922                         goto close_out;
12923                 }
12924
12925                 if (init_extent_tree) {
12926                         printf("Creating a new extent tree\n");
12927                         ret = reinit_extent_tree(trans, info);
12928                         err |= !!ret;
12929                         if (ret)
12930                                 goto close_out;
12931                 }
12932
12933                 if (init_csum_tree) {
12934                         printf("Reinitialize checksum tree\n");
12935                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12936                         if (ret) {
12937                                 error("checksum tree initialization failed: %d",
12938                                                 ret);
12939                                 ret = -EIO;
12940                                 err |= !!ret;
12941                                 goto close_out;
12942                         }
12943
12944                         ret = fill_csum_tree(trans, info->csum_root,
12945                                              init_extent_tree);
12946                         err |= !!ret;
12947                         if (ret) {
12948                                 error("checksum tree refilling failed: %d", ret);
12949                                 return -EIO;
12950                         }
12951                 }
12952                 /*
12953                  * Ok now we commit and run the normal fsck, which will add
12954                  * extent entries for all of the items it finds.
12955                  */
12956                 ret = btrfs_commit_transaction(trans, info->extent_root);
12957                 err |= !!ret;
12958                 if (ret)
12959                         goto close_out;
12960         }
12961         if (!extent_buffer_uptodate(info->extent_root->node)) {
12962                 error("critical: extent_root, unable to check the filesystem");
12963                 ret = -EIO;
12964                 err |= !!ret;
12965                 goto close_out;
12966         }
12967         if (!extent_buffer_uptodate(info->csum_root->node)) {
12968                 error("critical: csum_root, unable to check the filesystem");
12969                 ret = -EIO;
12970                 err |= !!ret;
12971                 goto close_out;
12972         }
12973
12974         if (!ctx.progress_enabled)
12975                 fprintf(stderr, "checking extents\n");
12976         if (check_mode == CHECK_MODE_LOWMEM)
12977                 ret = check_chunks_and_extents_v2(root);
12978         else
12979                 ret = check_chunks_and_extents(root);
12980         err |= !!ret;
12981         if (ret)
12982                 error(
12983                 "errors found in extent allocation tree or chunk allocation");
12984
12985         ret = repair_root_items(info);
12986         err |= !!ret;
12987         if (ret < 0) {
12988                 error("failed to repair root items: %s", strerror(-ret));
12989                 goto close_out;
12990         }
12991         if (repair) {
12992                 fprintf(stderr, "Fixed %d roots.\n", ret);
12993                 ret = 0;
12994         } else if (ret > 0) {
12995                 fprintf(stderr,
12996                        "Found %d roots with an outdated root item.\n",
12997                        ret);
12998                 fprintf(stderr,
12999                         "Please run a filesystem check with the option --repair to fix them.\n");
13000                 ret = 1;
13001                 err |= !!ret;
13002                 goto close_out;
13003         }
13004
13005         if (!ctx.progress_enabled) {
13006                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13007                         fprintf(stderr, "checking free space tree\n");
13008                 else
13009                         fprintf(stderr, "checking free space cache\n");
13010         }
13011         ret = check_space_cache(root);
13012         err |= !!ret;
13013         if (ret) {
13014                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13015                         error("errors found in free space tree");
13016                 else
13017                         error("errors found in free space cache");
13018                 goto out;
13019         }
13020
13021         /*
13022          * We used to have to have these hole extents in between our real
13023          * extents so if we don't have this flag set we need to make sure there
13024          * are no gaps in the file extents for inodes, otherwise we can just
13025          * ignore it when this happens.
13026          */
13027         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13028         if (!ctx.progress_enabled)
13029                 fprintf(stderr, "checking fs roots\n");
13030         if (check_mode == CHECK_MODE_LOWMEM)
13031                 ret = check_fs_roots_v2(root->fs_info);
13032         else
13033                 ret = check_fs_roots(root, &root_cache);
13034         err |= !!ret;
13035         if (ret) {
13036                 error("errors found in fs roots");
13037                 goto out;
13038         }
13039
13040         fprintf(stderr, "checking csums\n");
13041         ret = check_csums(root);
13042         err |= !!ret;
13043         if (ret) {
13044                 error("errors found in csum tree");
13045                 goto out;
13046         }
13047
13048         fprintf(stderr, "checking root refs\n");
13049         /* For low memory mode, check_fs_roots_v2 handles root refs */
13050         if (check_mode != CHECK_MODE_LOWMEM) {
13051                 ret = check_root_refs(root, &root_cache);
13052                 err |= !!ret;
13053                 if (ret) {
13054                         error("errors found in root refs");
13055                         goto out;
13056                 }
13057         }
13058
13059         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13060                 struct extent_buffer *eb;
13061
13062                 eb = list_first_entry(&root->fs_info->recow_ebs,
13063                                       struct extent_buffer, recow);
13064                 list_del_init(&eb->recow);
13065                 ret = recow_extent_buffer(root, eb);
13066                 err |= !!ret;
13067                 if (ret) {
13068                         error("fails to fix transid errors");
13069                         break;
13070                 }
13071         }
13072
13073         while (!list_empty(&delete_items)) {
13074                 struct bad_item *bad;
13075
13076                 bad = list_first_entry(&delete_items, struct bad_item, list);
13077                 list_del_init(&bad->list);
13078                 if (repair) {
13079                         ret = delete_bad_item(root, bad);
13080                         err |= !!ret;
13081                 }
13082                 free(bad);
13083         }
13084
13085         if (info->quota_enabled) {
13086                 fprintf(stderr, "checking quota groups\n");
13087                 ret = qgroup_verify_all(info);
13088                 err |= !!ret;
13089                 if (ret) {
13090                         error("failed to check quota groups");
13091                         goto out;
13092                 }
13093                 report_qgroups(0);
13094                 ret = repair_qgroups(info, &qgroups_repaired);
13095                 err |= !!ret;
13096                 if (err) {
13097                         error("failed to repair quota groups");
13098                         goto out;
13099                 }
13100                 ret = 0;
13101         }
13102
13103         if (!list_empty(&root->fs_info->recow_ebs)) {
13104                 error("transid errors in file system");
13105                 ret = 1;
13106                 err |= !!ret;
13107         }
13108 out:
13109         if (found_old_backref) { /*
13110                  * there was a disk format change when mixed
13111                  * backref was in testing tree. The old format
13112                  * existed about one week.
13113                  */
13114                 printf("\n * Found old mixed backref format. "
13115                        "The old format is not supported! *"
13116                        "\n * Please mount the FS in readonly mode, "
13117                        "backup data and re-format the FS. *\n\n");
13118                 err |= 1;
13119         }
13120         printf("found %llu bytes used, ",
13121                (unsigned long long)bytes_used);
13122         if (err)
13123                 printf("error(s) found\n");
13124         else
13125                 printf("no error found\n");
13126         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13127         printf("total tree bytes: %llu\n",
13128                (unsigned long long)total_btree_bytes);
13129         printf("total fs tree bytes: %llu\n",
13130                (unsigned long long)total_fs_tree_bytes);
13131         printf("total extent tree bytes: %llu\n",
13132                (unsigned long long)total_extent_tree_bytes);
13133         printf("btree space waste bytes: %llu\n",
13134                (unsigned long long)btree_space_waste);
13135         printf("file data blocks allocated: %llu\n referenced %llu\n",
13136                 (unsigned long long)data_bytes_allocated,
13137                 (unsigned long long)data_bytes_referenced);
13138
13139         free_qgroup_counts();
13140         free_root_recs_tree(&root_cache);
13141 close_out:
13142         close_ctree(root);
13143 err_out:
13144         if (ctx.progress_enabled)
13145                 task_deinit(ctx.info);
13146
13147         return err;
13148 }